From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:49:45 +0200 Subject: Adding upstream version 6.1.76. Signed-off-by: Daniel Baumann --- arch/x86/.gitignore | 6 + arch/x86/Kbuild | 32 + arch/x86/Kconfig | 2976 +++++ arch/x86/Kconfig.assembler | 21 + arch/x86/Kconfig.cpu | 519 + arch/x86/Kconfig.debug | 277 + arch/x86/Makefile | 325 + arch/x86/Makefile.um | 61 + arch/x86/Makefile_32.cpu | 53 + arch/x86/boot/.gitignore | 14 + arch/x86/boot/Makefile | 159 + arch/x86/boot/a20.c | 163 + arch/x86/boot/apm.c | 73 + arch/x86/boot/bioscall.S | 79 + arch/x86/boot/bitops.h | 44 + arch/x86/boot/boot.h | 333 + arch/x86/boot/cmdline.c | 156 + arch/x86/boot/compressed/.gitignore | 7 + arch/x86/boot/compressed/Makefile | 161 + arch/x86/boot/compressed/acpi.c | 315 + arch/x86/boot/compressed/cmdline.c | 30 + arch/x86/boot/compressed/cpuflags.c | 9 + arch/x86/boot/compressed/early_serial_console.c | 6 + arch/x86/boot/compressed/efi.c | 234 + arch/x86/boot/compressed/efi.h | 126 + arch/x86/boot/compressed/efi_thunk_64.S | 195 + arch/x86/boot/compressed/error.c | 24 + arch/x86/boot/compressed/error.h | 10 + arch/x86/boot/compressed/head_32.S | 224 + arch/x86/boot/compressed/head_64.S | 1022 ++ arch/x86/boot/compressed/ident_map_64.c | 395 + arch/x86/boot/compressed/idt_64.c | 91 + arch/x86/boot/compressed/idt_handlers_64.S | 77 + arch/x86/boot/compressed/kaslr.c | 873 ++ arch/x86/boot/compressed/kernel_info.S | 22 + arch/x86/boot/compressed/mem_encrypt.S | 198 + arch/x86/boot/compressed/misc.c | 472 + arch/x86/boot/compressed/misc.h | 241 + arch/x86/boot/compressed/mkpiggy.c | 74 + arch/x86/boot/compressed/pgtable.h | 20 + arch/x86/boot/compressed/pgtable_64.c | 218 + arch/x86/boot/compressed/sev.c | 558 + arch/x86/boot/compressed/string.c | 81 + arch/x86/boot/compressed/tdcall.S | 3 + arch/x86/boot/compressed/tdx.c | 77 + arch/x86/boot/compressed/tdx.h | 13 + arch/x86/boot/compressed/vmlinux.lds.S | 117 + arch/x86/boot/copy.S | 65 + arch/x86/boot/cpu.c | 99 + arch/x86/boot/cpucheck.c | 227 + arch/x86/boot/cpuflags.c | 128 + arch/x86/boot/cpuflags.h | 22 + arch/x86/boot/ctype.h | 21 + arch/x86/boot/early_serial_console.c | 154 + arch/x86/boot/edd.c | 180 + arch/x86/boot/genimage.sh | 272 + arch/x86/boot/header.S | 660 ++ arch/x86/boot/install.sh | 37 + arch/x86/boot/io.h | 41 + arch/x86/boot/main.c | 185 + arch/x86/boot/memory.c | 123 + arch/x86/boot/mkcpustr.c | 50 + arch/x86/boot/msr.h | 26 + arch/x86/boot/mtools.conf.in | 21 + arch/x86/boot/pm.c | 124 + arch/x86/boot/pmjump.S | 75 + arch/x86/boot/printf.c | 307 + arch/x86/boot/regs.c | 27 + arch/x86/boot/setup.ld | 66 + arch/x86/boot/string.c | 378 + arch/x86/boot/string.h | 34 + arch/x86/boot/tools/.gitignore | 2 + arch/x86/boot/tools/build.c | 500 + arch/x86/boot/tty.c | 137 + arch/x86/boot/version.c | 20 + arch/x86/boot/vesa.h | 67 + arch/x86/boot/video-bios.c | 126 + arch/x86/boot/video-mode.c | 171 + arch/x86/boot/video-vesa.c | 279 + arch/x86/boot/video-vga.c | 286 + arch/x86/boot/video.c | 343 + arch/x86/boot/video.h | 118 + arch/x86/coco/Makefile | 8 + arch/x86/coco/core.c | 140 + arch/x86/coco/tdx/Makefile | 3 + arch/x86/coco/tdx/tdcall.S | 205 + arch/x86/coco/tdx/tdx.c | 834 ++ arch/x86/configs/i386_defconfig | 285 + arch/x86/configs/tiny.config | 5 + arch/x86/configs/x86_64_defconfig | 279 + arch/x86/configs/xen.config | 27 + arch/x86/crypto/.gitignore | 2 + arch/x86/crypto/Kconfig | 484 + arch/x86/crypto/Makefile | 109 + arch/x86/crypto/aegis128-aesni-asm.S | 748 ++ arch/x86/crypto/aegis128-aesni-glue.c | 291 + arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 597 + arch/x86/crypto/aesni-intel_asm.S | 3161 +++++ arch/x86/crypto/aesni-intel_avx-x86_64.S | 2826 +++++ arch/x86/crypto/aesni-intel_glue.c | 1319 +++ arch/x86/crypto/aria-aesni-avx-asm_64.S | 1304 ++ arch/x86/crypto/aria-avx.h | 16 + arch/x86/crypto/aria_aesni_avx_glue.c | 213 + arch/x86/crypto/blake2s-core.S | 256 + arch/x86/crypto/blake2s-glue.c | 77 + arch/x86/crypto/blowfish-x86_64-asm_64.S | 369 + arch/x86/crypto/blowfish_glue.c | 343 + arch/x86/crypto/camellia-aesni-avx-asm_64.S | 991 ++ arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 1051 ++ arch/x86/crypto/camellia-x86_64-asm_64.S | 499 + arch/x86/crypto/camellia.h | 67 + arch/x86/crypto/camellia_aesni_avx2_glue.c | 138 + arch/x86/crypto/camellia_aesni_avx_glue.c | 137 + arch/x86/crypto/camellia_glue.c | 1417 +++ arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 563 + arch/x86/crypto/cast5_avx_glue.c | 124 + arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 412 + arch/x86/crypto/cast6_avx_glue.c | 124 + arch/x86/crypto/chacha-avx2-x86_64.S | 1021 ++ arch/x86/crypto/chacha-avx512vl-x86_64.S | 836 ++ arch/x86/crypto/chacha-ssse3-x86_64.S | 791 ++ arch/x86/crypto/chacha_glue.c | 317 + arch/x86/crypto/crc32-pclmul_asm.S | 218 + arch/x86/crypto/crc32-pclmul_glue.c | 201 + arch/x86/crypto/crc32c-intel_glue.c | 250 + arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 463 + arch/x86/crypto/crct10dif-pcl-asm_64.S | 333 + arch/x86/crypto/crct10dif-pclmul_glue.c | 143 + arch/x86/crypto/curve25519-x86_64.c | 1724 +++ arch/x86/crypto/des3_ede-asm_64.S | 799 ++ arch/x86/crypto/des3_ede_glue.c | 392 + arch/x86/crypto/ecb_cbc_helpers.h | 76 + arch/x86/crypto/ghash-clmulni-intel_asm.S | 132 + arch/x86/crypto/ghash-clmulni-intel_glue.c | 354 + arch/x86/crypto/glue_helper-asm-avx.S | 36 + arch/x86/crypto/glue_helper-asm-avx2.S | 39 + arch/x86/crypto/nh-avx2-x86_64.S | 157 + arch/x86/crypto/nh-sse2-x86_64.S | 123 + arch/x86/crypto/nhpoly1305-avx2-glue.c | 79 + arch/x86/crypto/nhpoly1305-sse2-glue.c | 78 + arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 4249 +++++++ arch/x86/crypto/poly1305_glue.c | 290 + arch/x86/crypto/polyval-clmulni_asm.S | 321 + arch/x86/crypto/polyval-clmulni_glue.c | 212 + arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 713 ++ arch/x86/crypto/serpent-avx.h | 21 + arch/x86/crypto/serpent-avx2-asm_64.S | 726 ++ arch/x86/crypto/serpent-sse2-i586-asm_32.S | 616 + arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 739 ++ arch/x86/crypto/serpent-sse2.h | 60 + arch/x86/crypto/serpent_avx2_glue.c | 130 + arch/x86/crypto/serpent_avx_glue.c | 131 + arch/x86/crypto/serpent_sse2_glue.c | 131 + arch/x86/crypto/sha1_avx2_x86_64_asm.S | 711 ++ arch/x86/crypto/sha1_ni_asm.S | 305 + arch/x86/crypto/sha1_ssse3_asm.S | 554 + arch/x86/crypto/sha1_ssse3_glue.c | 362 + arch/x86/crypto/sha256-avx-asm.S | 500 + arch/x86/crypto/sha256-avx2-asm.S | 768 ++ arch/x86/crypto/sha256-ssse3-asm.S | 514 + arch/x86/crypto/sha256_ni_asm.S | 356 + arch/x86/crypto/sha256_ssse3_glue.c | 432 + arch/x86/crypto/sha512-avx-asm.S | 423 + arch/x86/crypto/sha512-avx2-asm.S | 749 ++ arch/x86/crypto/sha512-ssse3-asm.S | 425 + arch/x86/crypto/sha512_ssse3_glue.c | 347 + arch/x86/crypto/sm3-avx-asm_64.S | 518 + arch/x86/crypto/sm3_avx_glue.c | 134 + arch/x86/crypto/sm4-aesni-avx-asm_64.S | 595 + arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 502 + arch/x86/crypto/sm4-avx.h | 24 + arch/x86/crypto/sm4_aesni_avx2_glue.c | 169 + arch/x86/crypto/sm4_aesni_avx_glue.c | 487 + arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 376 + arch/x86/crypto/twofish-i586-asm_32.S | 321 + arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 305 + arch/x86/crypto/twofish-x86_64-asm_64.S | 308 + arch/x86/crypto/twofish.h | 21 + arch/x86/crypto/twofish_avx_glue.c | 133 + arch/x86/crypto/twofish_glue.c | 100 + arch/x86/crypto/twofish_glue_3way.c | 168 + arch/x86/entry/Makefile | 23 + arch/x86/entry/calling.h | 422 + arch/x86/entry/common.c | 415 + arch/x86/entry/entry.S | 22 + arch/x86/entry/entry_32.S | 1252 ++ arch/x86/entry/entry_64.S | 1530 +++ arch/x86/entry/entry_64_compat.S | 279 + arch/x86/entry/syscall_32.c | 25 + arch/x86/entry/syscall_64.c | 18 + arch/x86/entry/syscall_x32.c | 18 + arch/x86/entry/syscalls/Makefile | 78 + arch/x86/entry/syscalls/syscall_32.tbl | 457 + arch/x86/entry/syscalls/syscall_64.tbl | 419 + arch/x86/entry/thunk_32.S | 36 + arch/x86/entry/thunk_64.S | 52 + arch/x86/entry/vdso/.gitignore | 8 + arch/x86/entry/vdso/Makefile | 220 + arch/x86/entry/vdso/checkundef.sh | 10 + arch/x86/entry/vdso/extable.c | 46 + arch/x86/entry/vdso/extable.h | 28 + arch/x86/entry/vdso/vclock_gettime.c | 85 + arch/x86/entry/vdso/vdso-layout.lds.S | 111 + arch/x86/entry/vdso/vdso-note.S | 15 + arch/x86/entry/vdso/vdso.lds.S | 35 + arch/x86/entry/vdso/vdso2c.c | 254 + arch/x86/entry/vdso/vdso2c.h | 222 + arch/x86/entry/vdso/vdso32-setup.c | 99 + arch/x86/entry/vdso/vdso32/.gitignore | 2 + arch/x86/entry/vdso/vdso32/note.S | 18 + arch/x86/entry/vdso/vdso32/sigreturn.S | 140 + arch/x86/entry/vdso/vdso32/system_call.S | 85 + arch/x86/entry/vdso/vdso32/vclock_gettime.c | 29 + arch/x86/entry/vdso/vdso32/vdso32.lds.S | 40 + arch/x86/entry/vdso/vdsox32.lds.S | 27 + arch/x86/entry/vdso/vgetcpu.c | 22 + arch/x86/entry/vdso/vma.c | 459 + arch/x86/entry/vdso/vsgx.S | 151 + arch/x86/entry/vsyscall/Makefile | 6 + arch/x86/entry/vsyscall/vsyscall_64.c | 398 + arch/x86/entry/vsyscall/vsyscall_emu_64.S | 39 + arch/x86/entry/vsyscall/vsyscall_trace.h | 30 + arch/x86/events/Kconfig | 55 + arch/x86/events/Makefile | 8 + arch/x86/events/amd/Makefile | 10 + arch/x86/events/amd/brs.c | 434 + arch/x86/events/amd/core.c | 1521 +++ arch/x86/events/amd/ibs.c | 1541 +++ arch/x86/events/amd/iommu.c | 489 + arch/x86/events/amd/iommu.h | 24 + arch/x86/events/amd/lbr.c | 439 + arch/x86/events/amd/power.c | 305 + arch/x86/events/amd/uncore.c | 789 ++ arch/x86/events/core.c | 3029 +++++ arch/x86/events/intel/Makefile | 8 + arch/x86/events/intel/bts.c | 625 + arch/x86/events/intel/core.c | 6668 +++++++++++ arch/x86/events/intel/cstate.c | 793 ++ arch/x86/events/intel/ds.c | 2369 ++++ arch/x86/events/intel/knc.c | 322 + arch/x86/events/intel/lbr.c | 1620 +++ arch/x86/events/intel/p4.c | 1404 +++ arch/x86/events/intel/p6.c | 280 + arch/x86/events/intel/pt.c | 1814 +++ arch/x86/events/intel/pt.h | 132 + arch/x86/events/intel/uncore.c | 1923 +++ arch/x86/events/intel/uncore.h | 620 + arch/x86/events/intel/uncore_discovery.c | 642 + arch/x86/events/intel/uncore_discovery.h | 152 + arch/x86/events/intel/uncore_nhmex.c | 1228 ++ arch/x86/events/intel/uncore_snb.c | 1705 +++ arch/x86/events/intel/uncore_snbep.c | 6118 ++++++++++ arch/x86/events/msr.c | 316 + arch/x86/events/perf_event.h | 1669 +++ arch/x86/events/perf_event_flags.h | 22 + arch/x86/events/probe.c | 63 + arch/x86/events/probe.h | 30 + arch/x86/events/rapl.c | 875 ++ arch/x86/events/utils.c | 252 + arch/x86/events/zhaoxin/Makefile | 2 + arch/x86/events/zhaoxin/core.c | 619 + arch/x86/hyperv/Makefile | 7 + arch/x86/hyperv/hv_apic.c | 317 + arch/x86/hyperv/hv_init.c | 650 + arch/x86/hyperv/hv_proc.c | 213 + arch/x86/hyperv/hv_spinlock.c | 92 + arch/x86/hyperv/irqdomain.c | 364 + arch/x86/hyperv/ivm.c | 389 + arch/x86/hyperv/mmu.c | 243 + arch/x86/hyperv/nested.c | 136 + arch/x86/ia32/Makefile | 9 + arch/x86/ia32/audit.c | 48 + arch/x86/ia32/ia32_signal.c | 374 + arch/x86/include/asm/GEN-for-each-reg.h | 31 + arch/x86/include/asm/Kbuild | 13 + arch/x86/include/asm/acenv.h | 54 + arch/x86/include/asm/acpi.h | 203 + arch/x86/include/asm/acrn.h | 92 + arch/x86/include/asm/agp.h | 32 + arch/x86/include/asm/alternative.h | 394 + arch/x86/include/asm/amd-ibs.h | 152 + arch/x86/include/asm/amd_hsmp.h | 16 + arch/x86/include/asm/amd_nb.h | 125 + arch/x86/include/asm/apic.h | 525 + arch/x86/include/asm/apicdef.h | 438 + arch/x86/include/asm/apm.h | 74 + arch/x86/include/asm/arch_hweight.h | 55 + arch/x86/include/asm/archrandom.h | 62 + arch/x86/include/asm/asm-offsets.h | 1 + arch/x86/include/asm/asm-prototypes.h | 19 + arch/x86/include/asm/asm.h | 225 + arch/x86/include/asm/atomic.h | 272 + arch/x86/include/asm/atomic64_32.h | 342 + arch/x86/include/asm/atomic64_64.h | 254 + arch/x86/include/asm/audit.h | 7 + arch/x86/include/asm/barrier.h | 102 + arch/x86/include/asm/bios_ebda.h | 40 + arch/x86/include/asm/bitops.h | 433 + arch/x86/include/asm/boot.h | 82 + arch/x86/include/asm/bootparam_utils.h | 91 + arch/x86/include/asm/bug.h | 89 + arch/x86/include/asm/bugs.h | 15 + arch/x86/include/asm/cache.h | 24 + arch/x86/include/asm/cacheflush.h | 13 + arch/x86/include/asm/cacheinfo.h | 8 + arch/x86/include/asm/ce4100.h | 7 + arch/x86/include/asm/cfi.h | 22 + arch/x86/include/asm/checksum.h | 13 + arch/x86/include/asm/checksum_32.h | 186 + arch/x86/include/asm/checksum_64.h | 185 + arch/x86/include/asm/clocksource.h | 21 + arch/x86/include/asm/cmdline.h | 9 + arch/x86/include/asm/cmpxchg.h | 261 + arch/x86/include/asm/cmpxchg_32.h | 136 + arch/x86/include/asm/cmpxchg_64.h | 30 + arch/x86/include/asm/coco.h | 32 + arch/x86/include/asm/compat.h | 114 + arch/x86/include/asm/cpu.h | 101 + arch/x86/include/asm/cpu_device_id.h | 196 + arch/x86/include/asm/cpu_entry_area.h | 157 + arch/x86/include/asm/cpufeature.h | 217 + arch/x86/include/asm/cpufeatures.h | 480 + arch/x86/include/asm/cpuid.h | 34 + arch/x86/include/asm/cpuidle_haltpoll.h | 8 + arch/x86/include/asm/cpumask.h | 43 + arch/x86/include/asm/crash.h | 12 + arch/x86/include/asm/current.h | 22 + arch/x86/include/asm/debugreg.h | 156 + arch/x86/include/asm/delay.h | 12 + arch/x86/include/asm/desc.h | 451 + arch/x86/include/asm/desc_defs.h | 145 + arch/x86/include/asm/device.h | 11 + arch/x86/include/asm/disabled-features.h | 117 + arch/x86/include/asm/div64.h | 101 + arch/x86/include/asm/dma-mapping.h | 12 + arch/x86/include/asm/dma.h | 310 + arch/x86/include/asm/dmi.h | 22 + arch/x86/include/asm/doublefault.h | 13 + arch/x86/include/asm/dwarf2.h | 41 + arch/x86/include/asm/e820/api.h | 55 + arch/x86/include/asm/e820/types.h | 113 + arch/x86/include/asm/edac.h | 19 + arch/x86/include/asm/efi.h | 412 + arch/x86/include/asm/elf.h | 400 + arch/x86/include/asm/elfcore-compat.h | 31 + arch/x86/include/asm/emergency-restart.h | 7 + arch/x86/include/asm/emulate_prefix.h | 14 + arch/x86/include/asm/enclu.h | 9 + arch/x86/include/asm/entry-common.h | 99 + arch/x86/include/asm/espfix.h | 18 + arch/x86/include/asm/exec.h | 1 + arch/x86/include/asm/extable.h | 61 + arch/x86/include/asm/extable_fixup_types.h | 69 + arch/x86/include/asm/fb.h | 22 + arch/x86/include/asm/fixmap.h | 200 + arch/x86/include/asm/floppy.h | 281 + arch/x86/include/asm/fpu/api.h | 170 + arch/x86/include/asm/fpu/regset.h | 22 + arch/x86/include/asm/fpu/sched.h | 68 + arch/x86/include/asm/fpu/signal.h | 44 + arch/x86/include/asm/fpu/types.h | 585 + arch/x86/include/asm/fpu/xcr.h | 35 + arch/x86/include/asm/fpu/xstate.h | 132 + arch/x86/include/asm/frame.h | 113 + arch/x86/include/asm/fsgsbase.h | 85 + arch/x86/include/asm/ftrace.h | 129 + arch/x86/include/asm/futex.h | 103 + arch/x86/include/asm/gart.h | 113 + arch/x86/include/asm/genapic.h | 1 + arch/x86/include/asm/geode.h | 33 + arch/x86/include/asm/hardirq.h | 83 + arch/x86/include/asm/highmem.h | 77 + arch/x86/include/asm/hpet.h | 103 + arch/x86/include/asm/hugetlb.h | 10 + arch/x86/include/asm/hw_breakpoint.h | 77 + arch/x86/include/asm/hw_irq.h | 135 + arch/x86/include/asm/hyperv-tlfs.h | 676 ++ arch/x86/include/asm/hypervisor.h | 83 + arch/x86/include/asm/i8259.h | 87 + arch/x86/include/asm/ia32.h | 94 + arch/x86/include/asm/ia32_unistd.h | 12 + arch/x86/include/asm/ibt.h | 116 + arch/x86/include/asm/idtentry.h | 709 ++ arch/x86/include/asm/imr.h | 56 + arch/x86/include/asm/inat.h | 230 + arch/x86/include/asm/inat_types.h | 15 + arch/x86/include/asm/init.h | 17 + arch/x86/include/asm/insn-eval.h | 47 + arch/x86/include/asm/insn.h | 276 + arch/x86/include/asm/inst.h | 148 + arch/x86/include/asm/intel-family.h | 171 + arch/x86/include/asm/intel-mid.h | 44 + arch/x86/include/asm/intel_ds.h | 38 + arch/x86/include/asm/intel_pconfig.h | 65 + arch/x86/include/asm/intel_pt.h | 41 + arch/x86/include/asm/intel_punit_ipc.h | 102 + arch/x86/include/asm/intel_scu_ipc.h | 68 + arch/x86/include/asm/intel_telemetry.h | 139 + arch/x86/include/asm/invpcid.h | 50 + arch/x86/include/asm/io.h | 392 + arch/x86/include/asm/io_apic.h | 218 + arch/x86/include/asm/io_bitmap.h | 52 + arch/x86/include/asm/iomap.h | 22 + arch/x86/include/asm/iommu.h | 39 + arch/x86/include/asm/iosf_mbi.h | 253 + arch/x86/include/asm/irq.h | 52 + arch/x86/include/asm/irq_remapping.h | 72 + arch/x86/include/asm/irq_stack.h | 241 + arch/x86/include/asm/irq_vectors.h | 149 + arch/x86/include/asm/irq_work.h | 20 + arch/x86/include/asm/irqdomain.h | 66 + arch/x86/include/asm/irqflags.h | 142 + arch/x86/include/asm/ist.h | 14 + arch/x86/include/asm/jailhouse_para.h | 26 + arch/x86/include/asm/jump_label.h | 69 + arch/x86/include/asm/kasan.h | 38 + arch/x86/include/asm/kaslr.h | 15 + arch/x86/include/asm/kbdleds.h | 18 + arch/x86/include/asm/kdebug.h | 45 + arch/x86/include/asm/kexec-bzimage64.h | 7 + arch/x86/include/asm/kexec.h | 215 + arch/x86/include/asm/kfence.h | 73 + arch/x86/include/asm/kgdb.h | 92 + arch/x86/include/asm/kmsan.h | 87 + arch/x86/include/asm/kprobes.h | 125 + arch/x86/include/asm/kvm-x86-ops.h | 137 + arch/x86/include/asm/kvm-x86-pmu-ops.h | 31 + arch/x86/include/asm/kvm_host.h | 2118 ++++ arch/x86/include/asm/kvm_page_track.h | 79 + arch/x86/include/asm/kvm_para.h | 179 + arch/x86/include/asm/kvm_types.h | 7 + arch/x86/include/asm/kvm_vcpu_regs.h | 25 + arch/x86/include/asm/kvmclock.h | 21 + arch/x86/include/asm/linkage.h | 97 + arch/x86/include/asm/local.h | 162 + arch/x86/include/asm/mach_timer.h | 49 + arch/x86/include/asm/mach_traps.h | 44 + arch/x86/include/asm/math_emu.h | 15 + arch/x86/include/asm/mc146818rtc.h | 103 + arch/x86/include/asm/mce.h | 350 + arch/x86/include/asm/mem_encrypt.h | 115 + arch/x86/include/asm/memtype.h | 30 + arch/x86/include/asm/microcode.h | 141 + arch/x86/include/asm/microcode_amd.h | 60 + arch/x86/include/asm/microcode_intel.h | 85 + arch/x86/include/asm/misc.h | 7 + arch/x86/include/asm/mmconfig.h | 13 + arch/x86/include/asm/mmu.h | 69 + arch/x86/include/asm/mmu_context.h | 223 + arch/x86/include/asm/mmzone.h | 6 + arch/x86/include/asm/mmzone_32.h | 17 + arch/x86/include/asm/mmzone_64.h | 18 + arch/x86/include/asm/module.h | 16 + arch/x86/include/asm/mpspec.h | 144 + arch/x86/include/asm/mpspec_def.h | 182 + arch/x86/include/asm/mshyperv.h | 254 + arch/x86/include/asm/msi.h | 65 + arch/x86/include/asm/msr-index.h | 1120 ++ arch/x86/include/asm/msr-trace.h | 58 + arch/x86/include/asm/msr.h | 384 + arch/x86/include/asm/mtrr.h | 127 + arch/x86/include/asm/mwait.h | 144 + arch/x86/include/asm/nmi.h | 66 + arch/x86/include/asm/nops.h | 77 + arch/x86/include/asm/nospec-branch.h | 425 + arch/x86/include/asm/numa.h | 84 + arch/x86/include/asm/numa_32.h | 13 + arch/x86/include/asm/numachip/numachip.h | 20 + arch/x86/include/asm/numachip/numachip_csr.h | 98 + arch/x86/include/asm/olpc.h | 102 + arch/x86/include/asm/olpc_ofw.h | 38 + arch/x86/include/asm/orc_lookup.h | 34 + arch/x86/include/asm/orc_types.h | 72 + arch/x86/include/asm/page.h | 92 + arch/x86/include/asm/page_32.h | 35 + arch/x86/include/asm/page_32_types.h | 80 + arch/x86/include/asm/page_64.h | 106 + arch/x86/include/asm/page_64_types.h | 101 + arch/x86/include/asm/page_types.h | 78 + arch/x86/include/asm/paravirt.h | 795 ++ arch/x86/include/asm/paravirt_api_clock.h | 1 + arch/x86/include/asm/paravirt_types.h | 608 + arch/x86/include/asm/parport.h | 11 + arch/x86/include/asm/pc-conf-reg.h | 33 + arch/x86/include/asm/pci-direct.h | 18 + arch/x86/include/asm/pci-functions.h | 20 + arch/x86/include/asm/pci.h | 138 + arch/x86/include/asm/pci_x86.h | 256 + arch/x86/include/asm/percpu.h | 459 + arch/x86/include/asm/perf_event.h | 597 + arch/x86/include/asm/perf_event_p4.h | 877 ++ arch/x86/include/asm/pgalloc.h | 181 + arch/x86/include/asm/pgtable-2level.h | 115 + arch/x86/include/asm/pgtable-2level_types.h | 43 + arch/x86/include/asm/pgtable-3level.h | 292 + arch/x86/include/asm/pgtable-3level_types.h | 48 + arch/x86/include/asm/pgtable-invert.h | 41 + arch/x86/include/asm/pgtable.h | 1469 +++ arch/x86/include/asm/pgtable_32.h | 84 + arch/x86/include/asm/pgtable_32_areas.h | 53 + arch/x86/include/asm/pgtable_32_types.h | 23 + arch/x86/include/asm/pgtable_64.h | 274 + arch/x86/include/asm/pgtable_64_types.h | 216 + arch/x86/include/asm/pgtable_areas.h | 16 + arch/x86/include/asm/pgtable_types.h | 556 + arch/x86/include/asm/pkeys.h | 126 + arch/x86/include/asm/pkru.h | 62 + arch/x86/include/asm/platform_sst_audio.h | 136 + arch/x86/include/asm/pm-trace.h | 24 + arch/x86/include/asm/posix_types.h | 6 + arch/x86/include/asm/preempt.h | 150 + arch/x86/include/asm/probe_roms.h | 9 + arch/x86/include/asm/processor-cyrix.h | 18 + arch/x86/include/asm/processor-flags.h | 56 + arch/x86/include/asm/processor.h | 872 ++ arch/x86/include/asm/prom.h | 37 + arch/x86/include/asm/proto.h | 43 + arch/x86/include/asm/pti.h | 15 + arch/x86/include/asm/ptrace.h | 395 + arch/x86/include/asm/purgatory.h | 11 + arch/x86/include/asm/pvclock-abi.h | 48 + arch/x86/include/asm/pvclock.h | 106 + arch/x86/include/asm/qrwlock.h | 8 + arch/x86/include/asm/qspinlock.h | 114 + arch/x86/include/asm/qspinlock_paravirt.h | 73 + arch/x86/include/asm/realmode.h | 98 + arch/x86/include/asm/reboot.h | 37 + arch/x86/include/asm/reboot_fixups.h | 7 + arch/x86/include/asm/required-features.h | 104 + arch/x86/include/asm/resctrl.h | 108 + arch/x86/include/asm/rmwcc.h | 70 + arch/x86/include/asm/seccomp.h | 41 + arch/x86/include/asm/sections.h | 19 + arch/x86/include/asm/segment.h | 356 + arch/x86/include/asm/serial.h | 30 + arch/x86/include/asm/set_memory.h | 89 + arch/x86/include/asm/setup.h | 151 + arch/x86/include/asm/setup_arch.h | 3 + arch/x86/include/asm/sev-common.h | 171 + arch/x86/include/asm/sev.h | 230 + arch/x86/include/asm/sgx.h | 404 + arch/x86/include/asm/shared/io.h | 34 + arch/x86/include/asm/shared/msr.h | 15 + arch/x86/include/asm/shared/tdx.h | 40 + arch/x86/include/asm/shmparam.h | 7 + arch/x86/include/asm/sigcontext.h | 9 + arch/x86/include/asm/sigframe.h | 88 + arch/x86/include/asm/sighandling.h | 18 + arch/x86/include/asm/signal.h | 110 + arch/x86/include/asm/simd.h | 12 + arch/x86/include/asm/smap.h | 70 + arch/x86/include/asm/smp.h | 207 + arch/x86/include/asm/softirq_stack.h | 11 + arch/x86/include/asm/sparsemem.h | 45 + arch/x86/include/asm/spec-ctrl.h | 88 + arch/x86/include/asm/special_insns.h | 309 + arch/x86/include/asm/spinlock.h | 45 + arch/x86/include/asm/spinlock_types.h | 9 + arch/x86/include/asm/sta2x11.h | 13 + arch/x86/include/asm/stackprotector.h | 95 + arch/x86/include/asm/stacktrace.h | 114 + arch/x86/include/asm/static_call.h | 68 + arch/x86/include/asm/string.h | 6 + arch/x86/include/asm/string_32.h | 230 + arch/x86/include/asm/string_64.h | 132 + arch/x86/include/asm/suspend.h | 14 + arch/x86/include/asm/suspend_32.h | 37 + arch/x86/include/asm/suspend_64.h | 64 + arch/x86/include/asm/svm.h | 637 + arch/x86/include/asm/switch_to.h | 91 + arch/x86/include/asm/sync_bitops.h | 118 + arch/x86/include/asm/sync_core.h | 111 + arch/x86/include/asm/syscall.h | 135 + arch/x86/include/asm/syscall_wrapper.h | 290 + arch/x86/include/asm/syscalls.h | 15 + arch/x86/include/asm/tdx.h | 91 + arch/x86/include/asm/text-patching.h | 219 + arch/x86/include/asm/thermal.h | 15 + arch/x86/include/asm/thread_info.h | 237 + arch/x86/include/asm/time.h | 14 + arch/x86/include/asm/timer.h | 38 + arch/x86/include/asm/timex.h | 22 + arch/x86/include/asm/tlb.h | 37 + arch/x86/include/asm/tlbbatch.h | 15 + arch/x86/include/asm/tlbflush.h | 367 + arch/x86/include/asm/topology.h | 230 + arch/x86/include/asm/trace/common.h | 12 + arch/x86/include/asm/trace/exceptions.h | 54 + arch/x86/include/asm/trace/fpu.h | 99 + arch/x86/include/asm/trace/hyperv.h | 98 + arch/x86/include/asm/trace/irq_vectors.h | 383 + arch/x86/include/asm/trace_clock.h | 21 + arch/x86/include/asm/trap_pf.h | 26 + arch/x86/include/asm/trapnr.h | 32 + arch/x86/include/asm/traps.h | 50 + arch/x86/include/asm/tsc.h | 73 + arch/x86/include/asm/uaccess.h | 672 ++ arch/x86/include/asm/uaccess_32.h | 36 + arch/x86/include/asm/uaccess_64.h | 127 + arch/x86/include/asm/umip.h | 12 + arch/x86/include/asm/unistd.h | 61 + arch/x86/include/asm/unwind.h | 154 + arch/x86/include/asm/unwind_hints.h | 74 + arch/x86/include/asm/uprobes.h | 58 + arch/x86/include/asm/user.h | 64 + arch/x86/include/asm/user32.h | 71 + arch/x86/include/asm/user_32.h | 128 + arch/x86/include/asm/user_64.h | 134 + arch/x86/include/asm/uv/bios.h | 213 + arch/x86/include/asm/uv/uv.h | 44 + arch/x86/include/asm/uv/uv_geo.h | 103 + arch/x86/include/asm/uv/uv_hub.h | 779 ++ arch/x86/include/asm/uv/uv_irq.h | 38 + arch/x86/include/asm/uv/uv_mmrs.h | 4637 ++++++++ arch/x86/include/asm/vdso.h | 57 + arch/x86/include/asm/vdso/clocksource.h | 12 + arch/x86/include/asm/vdso/gettimeofday.h | 323 + arch/x86/include/asm/vdso/processor.h | 23 + arch/x86/include/asm/vdso/vsyscall.h | 29 + arch/x86/include/asm/vermagic.h | 68 + arch/x86/include/asm/vga.h | 33 + arch/x86/include/asm/vgtod.h | 24 + arch/x86/include/asm/virtext.h | 148 + arch/x86/include/asm/vm86.h | 91 + arch/x86/include/asm/vmalloc.h | 26 + arch/x86/include/asm/vmware.h | 57 + arch/x86/include/asm/vmx.h | 630 + arch/x86/include/asm/vmxfeatures.h | 92 + arch/x86/include/asm/vsyscall.h | 27 + arch/x86/include/asm/vvar.h | 55 + arch/x86/include/asm/word-at-a-time.h | 94 + arch/x86/include/asm/x86_init.h | 330 + arch/x86/include/asm/xen/cpuid.h | 125 + arch/x86/include/asm/xen/events.h | 38 + arch/x86/include/asm/xen/hypercall.h | 504 + arch/x86/include/asm/xen/hypervisor.h | 64 + arch/x86/include/asm/xen/interface.h | 390 + arch/x86/include/asm/xen/interface_32.h | 103 + arch/x86/include/asm/xen/interface_64.h | 149 + arch/x86/include/asm/xen/page.h | 357 + arch/x86/include/asm/xen/pci.h | 67 + arch/x86/include/asm/xen/swiotlb-xen.h | 17 + arch/x86/include/asm/xen/trace_types.h | 19 + arch/x86/include/asm/xor.h | 502 + arch/x86/include/asm/xor_32.h | 573 + arch/x86/include/asm/xor_64.h | 28 + arch/x86/include/asm/xor_avx.h | 178 + arch/x86/include/uapi/asm/Kbuild | 4 + arch/x86/include/uapi/asm/a.out.h | 21 + arch/x86/include/uapi/asm/amd_hsmp.h | 307 + arch/x86/include/uapi/asm/auxvec.h | 20 + arch/x86/include/uapi/asm/bitsperlong.h | 14 + arch/x86/include/uapi/asm/boot.h | 11 + arch/x86/include/uapi/asm/bootparam.h | 284 + arch/x86/include/uapi/asm/byteorder.h | 7 + arch/x86/include/uapi/asm/debugreg.h | 82 + arch/x86/include/uapi/asm/e820.h | 82 + arch/x86/include/uapi/asm/hw_breakpoint.h | 2 + arch/x86/include/uapi/asm/hwcap2.h | 13 + arch/x86/include/uapi/asm/ist.h | 30 + arch/x86/include/uapi/asm/kvm.h | 535 + arch/x86/include/uapi/asm/kvm_para.h | 153 + arch/x86/include/uapi/asm/kvm_perf.h | 17 + arch/x86/include/uapi/asm/ldt.h | 48 + arch/x86/include/uapi/asm/mce.h | 45 + arch/x86/include/uapi/asm/mman.h | 17 + arch/x86/include/uapi/asm/msgbuf.h | 35 + arch/x86/include/uapi/asm/msr.h | 14 + arch/x86/include/uapi/asm/mtrr.h | 124 + arch/x86/include/uapi/asm/perf_regs.h | 58 + arch/x86/include/uapi/asm/posix_types.h | 10 + arch/x86/include/uapi/asm/posix_types_32.h | 26 + arch/x86/include/uapi/asm/posix_types_64.h | 20 + arch/x86/include/uapi/asm/posix_types_x32.h | 20 + arch/x86/include/uapi/asm/prctl.h | 23 + arch/x86/include/uapi/asm/processor-flags.h | 168 + arch/x86/include/uapi/asm/ptrace-abi.h | 94 + arch/x86/include/uapi/asm/ptrace.h | 86 + arch/x86/include/uapi/asm/sembuf.h | 36 + arch/x86/include/uapi/asm/setup.h | 1 + arch/x86/include/uapi/asm/sgx.h | 232 + arch/x86/include/uapi/asm/shmbuf.h | 47 + arch/x86/include/uapi/asm/sigcontext.h | 389 + arch/x86/include/uapi/asm/sigcontext32.h | 9 + arch/x86/include/uapi/asm/siginfo.h | 15 + arch/x86/include/uapi/asm/signal.h | 112 + arch/x86/include/uapi/asm/stat.h | 138 + arch/x86/include/uapi/asm/statfs.h | 13 + arch/x86/include/uapi/asm/svm.h | 243 + arch/x86/include/uapi/asm/swab.h | 37 + arch/x86/include/uapi/asm/ucontext.h | 56 + arch/x86/include/uapi/asm/unistd.h | 25 + arch/x86/include/uapi/asm/vm86.h | 130 + arch/x86/include/uapi/asm/vmx.h | 167 + arch/x86/include/uapi/asm/vsyscall.h | 13 + arch/x86/kernel/.gitignore | 4 + arch/x86/kernel/Makefile | 156 + arch/x86/kernel/acpi/Makefile | 11 + arch/x86/kernel/acpi/apei.c | 50 + arch/x86/kernel/acpi/boot.c | 1898 +++ arch/x86/kernel/acpi/cppc.c | 118 + arch/x86/kernel/acpi/cstate.c | 230 + arch/x86/kernel/acpi/sleep.c | 181 + arch/x86/kernel/acpi/sleep.h | 22 + arch/x86/kernel/acpi/wakeup_32.S | 101 + arch/x86/kernel/acpi/wakeup_64.S | 143 + arch/x86/kernel/alternative.c | 1767 +++ arch/x86/kernel/amd_gart_64.c | 842 ++ arch/x86/kernel/amd_nb.c | 524 + arch/x86/kernel/aperture_64.c | 562 + arch/x86/kernel/apic/Makefile | 30 + arch/x86/kernel/apic/apic.c | 3010 +++++ arch/x86/kernel/apic/apic_common.c | 46 + arch/x86/kernel/apic/apic_flat_64.c | 247 + arch/x86/kernel/apic/apic_noop.c | 141 + arch/x86/kernel/apic/apic_numachip.c | 334 + arch/x86/kernel/apic/bigsmp_32.c | 189 + arch/x86/kernel/apic/hw_nmi.c | 59 + arch/x86/kernel/apic/io_apic.c | 3117 +++++ arch/x86/kernel/apic/ipi.c | 331 + arch/x86/kernel/apic/local.h | 69 + arch/x86/kernel/apic/msi.c | 348 + arch/x86/kernel/apic/probe_32.c | 209 + arch/x86/kernel/apic/probe_64.c | 52 + arch/x86/kernel/apic/vector.c | 1340 +++ arch/x86/kernel/apic/x2apic_cluster.c | 241 + arch/x86/kernel/apic/x2apic_phys.c | 203 + arch/x86/kernel/apic/x2apic_uv_x.c | 1839 +++ arch/x86/kernel/apm_32.c | 2439 ++++ arch/x86/kernel/asm-offsets.c | 110 + arch/x86/kernel/asm-offsets_32.c | 58 + arch/x86/kernel/asm-offsets_64.c | 64 + arch/x86/kernel/audit_64.c | 85 + arch/x86/kernel/bootflag.c | 102 + arch/x86/kernel/cfi.c | 86 + arch/x86/kernel/check.c | 187 + arch/x86/kernel/cpu/.gitignore | 2 + arch/x86/kernel/cpu/Makefile | 70 + arch/x86/kernel/cpu/acrn.c | 81 + arch/x86/kernel/cpu/amd.c | 1309 ++ arch/x86/kernel/cpu/aperfmperf.c | 460 + arch/x86/kernel/cpu/bugs.c | 2827 +++++ arch/x86/kernel/cpu/cacheinfo.c | 1042 ++ arch/x86/kernel/cpu/centaur.c | 251 + arch/x86/kernel/cpu/common.c | 2463 ++++ arch/x86/kernel/cpu/cpu.h | 90 + arch/x86/kernel/cpu/cpuid-deps.c | 140 + arch/x86/kernel/cpu/cyrix.c | 467 + arch/x86/kernel/cpu/feat_ctl.c | 212 + arch/x86/kernel/cpu/hygon.c | 394 + arch/x86/kernel/cpu/hypervisor.c | 109 + arch/x86/kernel/cpu/intel.c | 1427 +++ arch/x86/kernel/cpu/intel_epb.c | 235 + arch/x86/kernel/cpu/intel_pconfig.c | 82 + arch/x86/kernel/cpu/match.c | 91 + arch/x86/kernel/cpu/mce/Makefile | 14 + arch/x86/kernel/cpu/mce/amd.c | 1378 +++ arch/x86/kernel/cpu/mce/apei.c | 216 + arch/x86/kernel/cpu/mce/core.c | 2874 +++++ arch/x86/kernel/cpu/mce/dev-mcelog.c | 374 + arch/x86/kernel/cpu/mce/genpool.c | 147 + arch/x86/kernel/cpu/mce/inject.c | 801 ++ arch/x86/kernel/cpu/mce/intel.c | 521 + arch/x86/kernel/cpu/mce/internal.h | 236 + arch/x86/kernel/cpu/mce/p5.c | 66 + arch/x86/kernel/cpu/mce/severity.c | 477 + arch/x86/kernel/cpu/mce/threshold.c | 31 + arch/x86/kernel/cpu/mce/winchip.c | 41 + arch/x86/kernel/cpu/microcode/Makefile | 5 + arch/x86/kernel/cpu/microcode/amd.c | 972 ++ arch/x86/kernel/cpu/microcode/core.c | 810 ++ arch/x86/kernel/cpu/microcode/intel.c | 941 ++ arch/x86/kernel/cpu/mkcapflags.sh | 74 + arch/x86/kernel/cpu/mshyperv.c | 502 + arch/x86/kernel/cpu/mtrr/Makefile | 4 + arch/x86/kernel/cpu/mtrr/amd.c | 125 + arch/x86/kernel/cpu/mtrr/centaur.c | 127 + arch/x86/kernel/cpu/mtrr/cleanup.c | 987 ++ arch/x86/kernel/cpu/mtrr/cyrix.c | 284 + arch/x86/kernel/cpu/mtrr/generic.c | 924 ++ arch/x86/kernel/cpu/mtrr/if.c | 425 + arch/x86/kernel/cpu/mtrr/mtrr.c | 887 ++ arch/x86/kernel/cpu/mtrr/mtrr.h | 80 + arch/x86/kernel/cpu/perfctr-watchdog.c | 162 + arch/x86/kernel/cpu/powerflags.c | 24 + arch/x86/kernel/cpu/proc.c | 177 + arch/x86/kernel/cpu/rdrand.c | 49 + arch/x86/kernel/cpu/resctrl/Makefile | 4 + arch/x86/kernel/cpu/resctrl/core.c | 950 ++ arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 587 + arch/x86/kernel/cpu/resctrl/internal.h | 542 + arch/x86/kernel/cpu/resctrl/monitor.c | 822 ++ arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 1600 +++ arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h | 43 + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3480 ++++++ arch/x86/kernel/cpu/scattered.c | 73 + arch/x86/kernel/cpu/sgx/Makefile | 6 + arch/x86/kernel/cpu/sgx/driver.c | 180 + arch/x86/kernel/cpu/sgx/driver.h | 29 + arch/x86/kernel/cpu/sgx/encl.c | 1323 +++ arch/x86/kernel/cpu/sgx/encl.h | 129 + arch/x86/kernel/cpu/sgx/encls.h | 236 + arch/x86/kernel/cpu/sgx/ioctl.c | 1263 ++ arch/x86/kernel/cpu/sgx/main.c | 963 ++ arch/x86/kernel/cpu/sgx/sgx.h | 107 + arch/x86/kernel/cpu/sgx/virt.c | 435 + arch/x86/kernel/cpu/topology.c | 168 + arch/x86/kernel/cpu/transmeta.c | 111 + arch/x86/kernel/cpu/tsx.c | 257 + arch/x86/kernel/cpu/umc.c | 26 + arch/x86/kernel/cpu/umwait.c | 238 + arch/x86/kernel/cpu/vmware.c | 528 + arch/x86/kernel/cpu/vortex.c | 39 + arch/x86/kernel/cpu/zhaoxin.c | 133 + arch/x86/kernel/cpuid.c | 190 + arch/x86/kernel/crash.c | 366 + arch/x86/kernel/crash_core_32.c | 17 + arch/x86/kernel/crash_core_64.c | 24 + arch/x86/kernel/crash_dump_32.c | 47 + arch/x86/kernel/crash_dump_64.c | 64 + arch/x86/kernel/devicetree.c | 319 + arch/x86/kernel/doublefault_32.c | 128 + arch/x86/kernel/dumpstack.c | 479 + arch/x86/kernel/dumpstack_32.c | 155 + arch/x86/kernel/dumpstack_64.c | 221 + arch/x86/kernel/e820.c | 1350 +++ arch/x86/kernel/early-quirks.c | 813 ++ arch/x86/kernel/early_printk.c | 398 + arch/x86/kernel/ebda.c | 98 + arch/x86/kernel/eisa.c | 24 + arch/x86/kernel/espfix_64.c | 207 + arch/x86/kernel/fpu/Makefile | 6 + arch/x86/kernel/fpu/bugs.c | 59 + arch/x86/kernel/fpu/context.h | 82 + arch/x86/kernel/fpu/core.c | 865 ++ arch/x86/kernel/fpu/init.c | 229 + arch/x86/kernel/fpu/internal.h | 28 + arch/x86/kernel/fpu/legacy.h | 111 + arch/x86/kernel/fpu/regset.c | 386 + arch/x86/kernel/fpu/signal.c | 542 + arch/x86/kernel/fpu/xstate.c | 1843 +++ arch/x86/kernel/fpu/xstate.h | 327 + arch/x86/kernel/ftrace.c | 666 ++ arch/x86/kernel/ftrace_32.S | 193 + arch/x86/kernel/ftrace_64.S | 344 + arch/x86/kernel/head32.c | 119 + arch/x86/kernel/head64.c | 637 + arch/x86/kernel/head_32.S | 583 + arch/x86/kernel/head_64.S | 674 ++ arch/x86/kernel/hpet.c | 1471 +++ arch/x86/kernel/hw_breakpoint.c | 592 + arch/x86/kernel/i8237.c | 76 + arch/x86/kernel/i8253.c | 67 + arch/x86/kernel/i8259.c | 457 + arch/x86/kernel/idt.c | 344 + arch/x86/kernel/io_delay.c | 148 + arch/x86/kernel/ioport.c | 215 + arch/x86/kernel/irq.c | 399 + arch/x86/kernel/irq_32.c | 163 + arch/x86/kernel/irq_64.c | 76 + arch/x86/kernel/irq_work.c | 34 + arch/x86/kernel/irqflags.S | 17 + arch/x86/kernel/irqinit.c | 107 + arch/x86/kernel/itmt.c | 205 + arch/x86/kernel/jailhouse.c | 295 + arch/x86/kernel/jump_label.c | 148 + arch/x86/kernel/kdebugfs.c | 195 + arch/x86/kernel/kexec-bzimage64.c | 605 + arch/x86/kernel/kgdb.c | 785 ++ arch/x86/kernel/kprobes/Makefile | 8 + arch/x86/kernel/kprobes/common.h | 109 + arch/x86/kernel/kprobes/core.c | 1047 ++ arch/x86/kernel/kprobes/ftrace.c | 70 + arch/x86/kernel/kprobes/opt.c | 563 + arch/x86/kernel/ksysfs.c | 401 + arch/x86/kernel/kvm.c | 1158 ++ arch/x86/kernel/kvmclock.c | 349 + arch/x86/kernel/ldt.c | 694 ++ arch/x86/kernel/machine_kexec_32.c | 240 + arch/x86/kernel/machine_kexec_64.c | 602 + arch/x86/kernel/mmconf-fam10h_64.c | 238 + arch/x86/kernel/module.c | 323 + arch/x86/kernel/mpparse.c | 945 ++ arch/x86/kernel/msr.c | 333 + arch/x86/kernel/nmi.c | 555 + arch/x86/kernel/nmi_selftest.c | 184 + arch/x86/kernel/paravirt-spinlocks.c | 43 + arch/x86/kernel/paravirt.c | 403 + arch/x86/kernel/pci-dma.c | 228 + arch/x86/kernel/pcspeaker.c | 14 + arch/x86/kernel/perf_regs.c | 202 + arch/x86/kernel/platform-quirks.c | 46 + arch/x86/kernel/pmem.c | 37 + arch/x86/kernel/probe_roms.c | 280 + arch/x86/kernel/process.c | 1042 ++ arch/x86/kernel/process.h | 39 + arch/x86/kernel/process_32.c | 223 + arch/x86/kernel/process_64.c | 863 ++ arch/x86/kernel/ptrace.c | 1369 +++ arch/x86/kernel/pvclock.c | 156 + arch/x86/kernel/quirks.c | 669 ++ arch/x86/kernel/reboot.c | 965 ++ arch/x86/kernel/reboot_fixups_32.c | 103 + arch/x86/kernel/relocate_kernel_32.S | 291 + arch/x86/kernel/relocate_kernel_64.S | 316 + arch/x86/kernel/resource.c | 64 + arch/x86/kernel/rethook.c | 127 + arch/x86/kernel/rtc.c | 162 + arch/x86/kernel/setup.c | 1359 +++ arch/x86/kernel/setup_percpu.c | 247 + arch/x86/kernel/sev-shared.c | 1069 ++ arch/x86/kernel/sev.c | 2304 ++++ arch/x86/kernel/sev_verify_cbit.S | 89 + arch/x86/kernel/signal.c | 1006 ++ arch/x86/kernel/signal_compat.c | 191 + arch/x86/kernel/smp.c | 298 + arch/x86/kernel/smpboot.c | 1917 +++ arch/x86/kernel/stacktrace.c | 130 + arch/x86/kernel/static_call.c | 212 + arch/x86/kernel/step.c | 242 + arch/x86/kernel/sys_ia32.c | 256 + arch/x86/kernel/sys_x86_64.c | 228 + arch/x86/kernel/tboot.c | 517 + arch/x86/kernel/time.c | 129 + arch/x86/kernel/tls.c | 295 + arch/x86/kernel/tls.h | 18 + arch/x86/kernel/topology.c | 162 + arch/x86/kernel/trace.c | 234 + arch/x86/kernel/trace_clock.c | 17 + arch/x86/kernel/tracepoint.c | 21 + arch/x86/kernel/traps.c | 1471 +++ arch/x86/kernel/tsc.c | 1577 +++ arch/x86/kernel/tsc_msr.c | 236 + arch/x86/kernel/tsc_sync.c | 534 + arch/x86/kernel/umip.c | 411 + arch/x86/kernel/unwind_frame.c | 419 + arch/x86/kernel/unwind_guess.c | 72 + arch/x86/kernel/unwind_orc.c | 726 ++ arch/x86/kernel/uprobes.c | 1099 ++ arch/x86/kernel/verify_cpu.S | 140 + arch/x86/kernel/vm86_32.c | 832 ++ arch/x86/kernel/vmlinux.lds.S | 539 + arch/x86/kernel/vsmp_64.c | 153 + arch/x86/kernel/x86_init.c | 166 + arch/x86/kvm/.gitignore | 2 + arch/x86/kvm/Kconfig | 133 + arch/x86/kvm/Makefile | 48 + arch/x86/kvm/cpuid.c | 1507 +++ arch/x86/kvm/cpuid.h | 235 + arch/x86/kvm/debugfs.c | 194 + arch/x86/kvm/emulate.c | 5853 +++++++++ arch/x86/kvm/fpu.h | 140 + arch/x86/kvm/hyperv.c | 2582 ++++ arch/x86/kvm/hyperv.h | 154 + arch/x86/kvm/i8254.c | 751 ++ arch/x86/kvm/i8254.h | 65 + arch/x86/kvm/i8259.c | 655 + arch/x86/kvm/ioapic.c | 745 ++ arch/x86/kvm/ioapic.h | 123 + arch/x86/kvm/irq.c | 167 + arch/x86/kvm/irq.h | 114 + arch/x86/kvm/irq_comm.c | 440 + arch/x86/kvm/kvm-asm-offsets.c | 29 + arch/x86/kvm/kvm_cache_regs.h | 208 + arch/x86/kvm/kvm_emulate.h | 529 + arch/x86/kvm/kvm_onhyperv.c | 108 + arch/x86/kvm/kvm_onhyperv.h | 25 + arch/x86/kvm/lapic.c | 3098 +++++ arch/x86/kvm/lapic.h | 277 + arch/x86/kvm/mmu.h | 305 + arch/x86/kvm/mmu/mmu.c | 6961 +++++++++++ arch/x86/kvm/mmu/mmu_internal.h | 320 + arch/x86/kvm/mmu/mmutrace.h | 451 + arch/x86/kvm/mmu/page_track.c | 302 + arch/x86/kvm/mmu/paging_tmpl.h | 1116 ++ arch/x86/kvm/mmu/spte.c | 507 + arch/x86/kvm/mmu/spte.h | 474 + arch/x86/kvm/mmu/tdp_iter.c | 180 + arch/x86/kvm/mmu/tdp_iter.h | 118 + arch/x86/kvm/mmu/tdp_mmu.c | 1885 +++ arch/x86/kvm/mmu/tdp_mmu.h | 95 + arch/x86/kvm/mtrr.c | 721 ++ arch/x86/kvm/pmu.c | 623 + arch/x86/kvm/pmu.h | 225 + arch/x86/kvm/reverse_cpuid.h | 187 + arch/x86/kvm/svm/avic.c | 1259 ++ arch/x86/kvm/svm/hyperv.h | 13 + arch/x86/kvm/svm/nested.c | 1716 +++ arch/x86/kvm/svm/pmu.c | 232 + arch/x86/kvm/svm/sev.c | 3076 +++++ arch/x86/kvm/svm/svm.c | 5172 ++++++++ arch/x86/kvm/svm/svm.h | 718 ++ arch/x86/kvm/svm/svm_onhyperv.c | 40 + arch/x86/kvm/svm/svm_onhyperv.h | 117 + arch/x86/kvm/svm/svm_ops.h | 64 + arch/x86/kvm/svm/vmenter.S | 392 + arch/x86/kvm/trace.h | 1834 +++ arch/x86/kvm/tss.h | 60 + arch/x86/kvm/vmx/capabilities.h | 404 + arch/x86/kvm/vmx/evmcs.c | 509 + arch/x86/kvm/vmx/evmcs.h | 234 + arch/x86/kvm/vmx/nested.c | 7028 +++++++++++ arch/x86/kvm/vmx/nested.h | 292 + arch/x86/kvm/vmx/pmu_intel.c | 814 ++ arch/x86/kvm/vmx/posted_intr.c | 351 + arch/x86/kvm/vmx/posted_intr.h | 106 + arch/x86/kvm/vmx/run_flags.h | 8 + arch/x86/kvm/vmx/sgx.c | 498 + arch/x86/kvm/vmx/sgx.h | 34 + arch/x86/kvm/vmx/vmcs.h | 193 + arch/x86/kvm/vmx/vmcs12.c | 154 + arch/x86/kvm/vmx/vmcs12.h | 430 + arch/x86/kvm/vmx/vmcs_shadow_fields.h | 79 + arch/x86/kvm/vmx/vmenter.S | 352 + arch/x86/kvm/vmx/vmx.c | 8628 ++++++++++++++ arch/x86/kvm/vmx/vmx.h | 773 ++ arch/x86/kvm/vmx/vmx_ops.h | 347 + arch/x86/kvm/x86.c | 13822 ++++++++++++++++++++++ arch/x86/kvm/x86.h | 489 + arch/x86/kvm/xen.c | 1899 +++ arch/x86/kvm/xen.h | 210 + arch/x86/lib/.gitignore | 2 + arch/x86/lib/Makefile | 75 + arch/x86/lib/atomic64_32.c | 4 + arch/x86/lib/atomic64_386_32.S | 195 + arch/x86/lib/atomic64_cx8_32.S | 180 + arch/x86/lib/cache-smp.c | 21 + arch/x86/lib/checksum_32.S | 444 + arch/x86/lib/clear_page_64.S | 190 + arch/x86/lib/cmdline.c | 214 + arch/x86/lib/cmpxchg16b_emu.S | 47 + arch/x86/lib/cmpxchg8b_emu.S | 46 + arch/x86/lib/copy_mc.c | 92 + arch/x86/lib/copy_mc_64.S | 149 + arch/x86/lib/copy_page_64.S | 89 + arch/x86/lib/copy_user_64.S | 410 + arch/x86/lib/cpu.c | 38 + arch/x86/lib/csum-copy_64.S | 256 + arch/x86/lib/csum-partial_64.c | 123 + arch/x86/lib/csum-wrappers_64.c | 97 + arch/x86/lib/delay.c | 231 + arch/x86/lib/error-inject.c | 24 + arch/x86/lib/getuser.S | 206 + arch/x86/lib/hweight.S | 83 + arch/x86/lib/inat.c | 83 + arch/x86/lib/insn-eval.c | 1670 +++ arch/x86/lib/insn.c | 755 ++ arch/x86/lib/iomap_copy_64.S | 15 + arch/x86/lib/iomem.c | 123 + arch/x86/lib/kaslr.c | 98 + arch/x86/lib/memcpy_32.c | 206 + arch/x86/lib/memcpy_64.S | 187 + arch/x86/lib/memmove_64.S | 217 + arch/x86/lib/memset_64.S | 140 + arch/x86/lib/misc.c | 22 + arch/x86/lib/msr-reg-export.c | 6 + arch/x86/lib/msr-reg.S | 93 + arch/x86/lib/msr-smp.c | 279 + arch/x86/lib/msr.c | 138 + arch/x86/lib/pc-conf-reg.c | 13 + arch/x86/lib/putuser.S | 122 + arch/x86/lib/retpoline.S | 270 + arch/x86/lib/string_32.c | 237 + arch/x86/lib/strstr_32.c | 33 + arch/x86/lib/usercopy.c | 55 + arch/x86/lib/usercopy_32.c | 340 + arch/x86/lib/usercopy_64.c | 148 + arch/x86/lib/x86-opcode-map.txt | 1188 ++ arch/x86/math-emu/Makefile | 30 + arch/x86/math-emu/README | 427 + arch/x86/math-emu/control_w.h | 46 + arch/x86/math-emu/div_Xsig.S | 367 + arch/x86/math-emu/div_small.S | 48 + arch/x86/math-emu/errors.c | 686 ++ arch/x86/math-emu/exception.h | 51 + arch/x86/math-emu/fpu_arith.c | 153 + arch/x86/math-emu/fpu_asm.h | 32 + arch/x86/math-emu/fpu_aux.c | 267 + arch/x86/math-emu/fpu_emu.h | 218 + arch/x86/math-emu/fpu_entry.c | 717 ++ arch/x86/math-emu/fpu_etc.c | 131 + arch/x86/math-emu/fpu_proto.h | 157 + arch/x86/math-emu/fpu_system.h | 130 + arch/x86/math-emu/fpu_tags.c | 116 + arch/x86/math-emu/fpu_trig.c | 1649 +++ arch/x86/math-emu/get_address.c | 401 + arch/x86/math-emu/load_store.c | 322 + arch/x86/math-emu/mul_Xsig.S | 179 + arch/x86/math-emu/poly.h | 115 + arch/x86/math-emu/poly_2xm1.c | 146 + arch/x86/math-emu/poly_atan.c | 209 + arch/x86/math-emu/poly_l2.c | 245 + arch/x86/math-emu/poly_sin.c | 379 + arch/x86/math-emu/poly_tan.c | 213 + arch/x86/math-emu/polynom_Xsig.S | 137 + arch/x86/math-emu/reg_add_sub.c | 334 + arch/x86/math-emu/reg_compare.c | 479 + arch/x86/math-emu/reg_constant.c | 118 + arch/x86/math-emu/reg_constant.h | 26 + arch/x86/math-emu/reg_convert.c | 47 + arch/x86/math-emu/reg_divide.c | 183 + arch/x86/math-emu/reg_ld_str.c | 1220 ++ arch/x86/math-emu/reg_mul.c | 116 + arch/x86/math-emu/reg_norm.S | 150 + arch/x86/math-emu/reg_round.S | 711 ++ arch/x86/math-emu/reg_u_add.S | 169 + arch/x86/math-emu/reg_u_div.S | 474 + arch/x86/math-emu/reg_u_mul.S | 150 + arch/x86/math-emu/reg_u_sub.S | 274 + arch/x86/math-emu/round_Xsig.S | 142 + arch/x86/math-emu/shr_Xsig.S | 89 + arch/x86/math-emu/status_w.h | 68 + arch/x86/math-emu/version.h | 12 + arch/x86/math-emu/wm_shrx.S | 207 + arch/x86/math-emu/wm_sqrt.S | 472 + arch/x86/mm/Makefile | 69 + arch/x86/mm/amdtopology.c | 183 + arch/x86/mm/cpu_entry_area.c | 234 + arch/x86/mm/debug_pagetables.c | 76 + arch/x86/mm/dump_pagetables.c | 471 + arch/x86/mm/extable.c | 341 + arch/x86/mm/fault.c | 1531 +++ arch/x86/mm/highmem_32.c | 33 + arch/x86/mm/hugetlbpage.c | 174 + arch/x86/mm/ident_map.c | 147 + arch/x86/mm/init.c | 1106 ++ arch/x86/mm/init_32.c | 818 ++ arch/x86/mm/init_64.c | 1705 +++ arch/x86/mm/iomap_32.c | 65 + arch/x86/mm/ioremap.c | 931 ++ arch/x86/mm/kasan_init_64.c | 433 + arch/x86/mm/kaslr.c | 181 + arch/x86/mm/kmmio.c | 622 + arch/x86/mm/kmsan_shadow.c | 20 + arch/x86/mm/maccess.c | 33 + arch/x86/mm/mem_encrypt.c | 88 + arch/x86/mm/mem_encrypt_amd.c | 540 + arch/x86/mm/mem_encrypt_boot.S | 162 + arch/x86/mm/mem_encrypt_identity.c | 618 + arch/x86/mm/mm_internal.h | 28 + arch/x86/mm/mmap.c | 250 + arch/x86/mm/mmio-mod.c | 463 + arch/x86/mm/numa.c | 1037 ++ arch/x86/mm/numa_32.c | 59 + arch/x86/mm/numa_64.c | 13 + arch/x86/mm/numa_emulation.c | 585 + arch/x86/mm/numa_internal.h | 34 + arch/x86/mm/pat/Makefile | 5 + arch/x86/mm/pat/cpa-test.c | 277 + arch/x86/mm/pat/memtype.c | 1233 ++ arch/x86/mm/pat/memtype.h | 49 + arch/x86/mm/pat/memtype_interval.c | 194 + arch/x86/mm/pat/set_memory.c | 2424 ++++ arch/x86/mm/pf_in.c | 516 + arch/x86/mm/pf_in.h | 24 + arch/x86/mm/pgprot.c | 63 + arch/x86/mm/pgtable.c | 878 ++ arch/x86/mm/pgtable_32.c | 104 + arch/x86/mm/physaddr.c | 100 + arch/x86/mm/physaddr.h | 11 + arch/x86/mm/pkeys.c | 197 + arch/x86/mm/pti.c | 666 ++ arch/x86/mm/srat.c | 114 + arch/x86/mm/testmmiotrace.c | 146 + arch/x86/mm/tlb.c | 1323 +++ arch/x86/net/Makefile | 10 + arch/x86/net/bpf_jit_comp.c | 2597 ++++ arch/x86/net/bpf_jit_comp32.c | 2624 ++++ arch/x86/pci/Makefile | 27 + arch/x86/pci/acpi.c | 512 + arch/x86/pci/amd_bus.c | 404 + arch/x86/pci/broadcom_bus.c | 112 + arch/x86/pci/bus_numa.c | 146 + arch/x86/pci/bus_numa.h | 27 + arch/x86/pci/ce4100.c | 324 + arch/x86/pci/common.c | 734 ++ arch/x86/pci/direct.c | 315 + arch/x86/pci/early.c | 59 + arch/x86/pci/fixup.c | 847 ++ arch/x86/pci/i386.c | 409 + arch/x86/pci/init.c | 51 + arch/x86/pci/intel_mid_pci.c | 406 + arch/x86/pci/irq.c | 1810 +++ arch/x86/pci/legacy.c | 77 + arch/x86/pci/mmconfig-shared.c | 815 ++ arch/x86/pci/mmconfig_32.c | 156 + arch/x86/pci/mmconfig_64.c | 154 + arch/x86/pci/numachip.c | 127 + arch/x86/pci/olpc.c | 311 + arch/x86/pci/pcbios.c | 429 + arch/x86/pci/sta2x11-fixup.c | 232 + arch/x86/pci/xen.c | 588 + arch/x86/platform/Makefile | 14 + arch/x86/platform/atom/Makefile | 2 + arch/x86/platform/atom/punit_atom_debug.c | 156 + arch/x86/platform/ce4100/Makefile | 2 + arch/x86/platform/ce4100/ce4100.c | 156 + arch/x86/platform/ce4100/falconfalls.dts | 430 + arch/x86/platform/efi/Makefile | 6 + arch/x86/platform/efi/efi.c | 901 ++ arch/x86/platform/efi/efi_32.c | 142 + arch/x86/platform/efi/efi_64.c | 863 ++ arch/x86/platform/efi/efi_stub_32.S | 60 + arch/x86/platform/efi/efi_stub_64.S | 27 + arch/x86/platform/efi/efi_thunk_64.S | 98 + arch/x86/platform/efi/quirks.c | 773 ++ arch/x86/platform/geode/Makefile | 4 + arch/x86/platform/geode/alix.c | 202 + arch/x86/platform/geode/geos.c | 125 + arch/x86/platform/geode/net5501.c | 152 + arch/x86/platform/intel-mid/Makefile | 2 + arch/x86/platform/intel-mid/intel-mid.c | 124 + arch/x86/platform/intel-mid/pwr.c | 485 + arch/x86/platform/intel-quark/Makefile | 3 + arch/x86/platform/intel-quark/imr.c | 597 + arch/x86/platform/intel-quark/imr_selftest.c | 129 + arch/x86/platform/intel/Makefile | 2 + arch/x86/platform/intel/iosf_mbi.c | 571 + arch/x86/platform/iris/Makefile | 2 + arch/x86/platform/iris/iris.c | 122 + arch/x86/platform/olpc/Makefile | 6 + arch/x86/platform/olpc/olpc-xo1-pm.c | 188 + arch/x86/platform/olpc/olpc-xo1-rtc.c | 80 + arch/x86/platform/olpc/olpc-xo1-sci.c | 629 + arch/x86/platform/olpc/olpc-xo15-sci.c | 231 + arch/x86/platform/olpc/olpc.c | 321 + arch/x86/platform/olpc/olpc_dt.c | 326 + arch/x86/platform/olpc/olpc_ofw.c | 121 + arch/x86/platform/olpc/xo1-wakeup.S | 126 + arch/x86/platform/pvh/Makefile | 5 + arch/x86/platform/pvh/enlighten.c | 137 + arch/x86/platform/pvh/head.S | 166 + arch/x86/platform/scx200/Makefile | 3 + arch/x86/platform/scx200/scx200_32.c | 130 + arch/x86/platform/ts5500/Makefile | 2 + arch/x86/platform/ts5500/ts5500.c | 341 + arch/x86/platform/uv/Makefile | 2 + arch/x86/platform/uv/bios_uv.c | 269 + arch/x86/platform/uv/uv_irq.c | 217 + arch/x86/platform/uv/uv_nmi.c | 1096 ++ arch/x86/platform/uv/uv_time.c | 393 + arch/x86/power/Makefile | 12 + arch/x86/power/cpu.c | 544 + arch/x86/power/hibernate.c | 219 + arch/x86/power/hibernate_32.c | 198 + arch/x86/power/hibernate_64.c | 142 + arch/x86/power/hibernate_asm_32.S | 112 + arch/x86/power/hibernate_asm_64.S | 150 + arch/x86/purgatory/.gitignore | 1 + arch/x86/purgatory/Makefile | 91 + arch/x86/purgatory/entry64.S | 103 + arch/x86/purgatory/kexec-purgatory.S | 14 + arch/x86/purgatory/purgatory.c | 59 + arch/x86/purgatory/setup-x86_64.S | 59 + arch/x86/purgatory/stack.S | 18 + arch/x86/ras/Kconfig | 23 + arch/x86/realmode/Makefile | 22 + arch/x86/realmode/init.c | 217 + arch/x86/realmode/rm/.gitignore | 4 + arch/x86/realmode/rm/Makefile | 80 + arch/x86/realmode/rm/bioscall.S | 1 + arch/x86/realmode/rm/copy.S | 1 + arch/x86/realmode/rm/header.S | 45 + arch/x86/realmode/rm/realmode.h | 22 + arch/x86/realmode/rm/realmode.lds.S | 77 + arch/x86/realmode/rm/reboot.S | 157 + arch/x86/realmode/rm/regs.c | 1 + arch/x86/realmode/rm/stack.S | 18 + arch/x86/realmode/rm/trampoline_32.S | 73 + arch/x86/realmode/rm/trampoline_64.S | 246 + arch/x86/realmode/rm/trampoline_common.S | 14 + arch/x86/realmode/rm/video-bios.c | 1 + arch/x86/realmode/rm/video-mode.c | 1 + arch/x86/realmode/rm/video-vesa.c | 1 + arch/x86/realmode/rm/video-vga.c | 1 + arch/x86/realmode/rm/wakemain.c | 87 + arch/x86/realmode/rm/wakeup.h | 43 + arch/x86/realmode/rm/wakeup_asm.S | 179 + arch/x86/realmode/rmpiggy.S | 19 + arch/x86/tools/.gitignore | 2 + arch/x86/tools/Makefile | 46 + arch/x86/tools/chkobjdump.awk | 34 + arch/x86/tools/gen-insn-attr-x86.awk | 441 + arch/x86/tools/insn_decoder_test.c | 174 + arch/x86/tools/insn_sanity.c | 265 + arch/x86/tools/objdump_reformat.awk | 48 + arch/x86/tools/relocs.c | 1195 ++ arch/x86/tools/relocs.h | 39 + arch/x86/tools/relocs_32.c | 18 + arch/x86/tools/relocs_64.c | 18 + arch/x86/tools/relocs_common.c | 85 + arch/x86/um/Kconfig | 49 + arch/x86/um/Makefile | 51 + arch/x86/um/asm/apic.h | 4 + arch/x86/um/asm/arch_hweight.h | 7 + arch/x86/um/asm/archparam.h | 20 + arch/x86/um/asm/barrier.h | 29 + arch/x86/um/asm/checksum.h | 119 + arch/x86/um/asm/checksum_32.h | 38 + arch/x86/um/asm/checksum_64.h | 19 + arch/x86/um/asm/desc.h | 17 + arch/x86/um/asm/elf.h | 217 + arch/x86/um/asm/irq_vectors.h | 10 + arch/x86/um/asm/mm_context.h | 72 + arch/x86/um/asm/module.h | 24 + arch/x86/um/asm/processor.h | 43 + arch/x86/um/asm/processor_32.h | 53 + arch/x86/um/asm/processor_64.h | 37 + arch/x86/um/asm/ptrace.h | 87 + arch/x86/um/asm/required-features.h | 9 + arch/x86/um/asm/segment.h | 11 + arch/x86/um/asm/syscall.h | 21 + arch/x86/um/asm/vm-flags.h | 19 + arch/x86/um/bugs_32.c | 74 + arch/x86/um/bugs_64.c | 15 + arch/x86/um/checksum_32.S | 214 + arch/x86/um/delay.c | 57 + arch/x86/um/elfcore.c | 77 + arch/x86/um/fault.c | 28 + arch/x86/um/ldt.c | 380 + arch/x86/um/mem_32.c | 50 + arch/x86/um/mem_64.c | 11 + arch/x86/um/os-Linux/Makefile | 13 + arch/x86/um/os-Linux/mcontext.c | 32 + arch/x86/um/os-Linux/prctl.c | 12 + arch/x86/um/os-Linux/registers.c | 169 + arch/x86/um/os-Linux/task_size.c | 151 + arch/x86/um/os-Linux/tls.c | 68 + arch/x86/um/ptrace_32.c | 278 + arch/x86/um/ptrace_64.c | 267 + arch/x86/um/ptrace_user.c | 21 + arch/x86/um/setjmp_32.S | 59 + arch/x86/um/setjmp_64.S | 55 + arch/x86/um/shared/sysdep/archsetjmp.h | 6 + arch/x86/um/shared/sysdep/archsetjmp_32.h | 23 + arch/x86/um/shared/sysdep/archsetjmp_64.h | 25 + arch/x86/um/shared/sysdep/faultinfo.h | 6 + arch/x86/um/shared/sysdep/faultinfo_32.h | 32 + arch/x86/um/shared/sysdep/faultinfo_64.h | 32 + arch/x86/um/shared/sysdep/kernel-offsets.h | 12 + arch/x86/um/shared/sysdep/mcontext.h | 31 + arch/x86/um/shared/sysdep/ptrace.h | 75 + arch/x86/um/shared/sysdep/ptrace_32.h | 24 + arch/x86/um/shared/sysdep/ptrace_64.h | 62 + arch/x86/um/shared/sysdep/ptrace_user.h | 28 + arch/x86/um/shared/sysdep/stub.h | 15 + arch/x86/um/shared/sysdep/stub_32.h | 116 + arch/x86/um/shared/sysdep/stub_64.h | 124 + arch/x86/um/shared/sysdep/syscalls.h | 6 + arch/x86/um/shared/sysdep/syscalls_32.h | 14 + arch/x86/um/shared/sysdep/syscalls_64.h | 28 + arch/x86/um/shared/sysdep/tls.h | 40 + arch/x86/um/signal.c | 583 + arch/x86/um/stub_32.S | 56 + arch/x86/um/stub_64.S | 50 + arch/x86/um/stub_segv.c | 20 + arch/x86/um/sys_call_table_32.c | 38 + arch/x86/um/sys_call_table_64.c | 33 + arch/x86/um/syscalls_32.c | 8 + arch/x86/um/syscalls_64.c | 100 + arch/x86/um/sysrq_32.c | 34 + arch/x86/um/sysrq_64.c | 36 + arch/x86/um/tls_32.c | 390 + arch/x86/um/tls_64.c | 18 + arch/x86/um/user-offsets.c | 84 + arch/x86/um/vdso/.gitignore | 2 + arch/x86/um/vdso/Makefile | 81 + arch/x86/um/vdso/checkundef.sh | 11 + arch/x86/um/vdso/um_vdso.c | 72 + arch/x86/um/vdso/vdso-layout.lds.S | 65 + arch/x86/um/vdso/vdso-note.S | 12 + arch/x86/um/vdso/vdso.S | 11 + arch/x86/um/vdso/vdso.lds.S | 33 + arch/x86/um/vdso/vma.c | 72 + arch/x86/video/Makefile | 2 + arch/x86/video/fbdev.c | 40 + arch/x86/virt/vmx/tdx/tdxcall.S | 96 + arch/x86/xen/Kconfig | 104 + arch/x86/xen/Makefile | 50 + arch/x86/xen/apic.c | 208 + arch/x86/xen/debugfs.c | 16 + arch/x86/xen/debugfs.h | 7 + arch/x86/xen/efi.c | 149 + arch/x86/xen/enlighten.c | 352 + arch/x86/xen/enlighten_hvm.c | 336 + arch/x86/xen/enlighten_pv.c | 1485 +++ arch/x86/xen/enlighten_pvh.c | 74 + arch/x86/xen/grant-table.c | 169 + arch/x86/xen/irq.c | 59 + arch/x86/xen/mmu.c | 53 + arch/x86/xen/mmu.h | 28 + arch/x86/xen/mmu_hvm.c | 69 + arch/x86/xen/mmu_pv.c | 2512 ++++ arch/x86/xen/multicalls.c | 214 + arch/x86/xen/multicalls.h | 69 + arch/x86/xen/p2m.c | 846 ++ arch/x86/xen/platform-pci-unplug.c | 210 + arch/x86/xen/pmu.c | 586 + arch/x86/xen/pmu.h | 22 + arch/x86/xen/setup.c | 976 ++ arch/x86/xen/smp.c | 259 + arch/x86/xen/smp.h | 43 + arch/x86/xen/smp_hvm.c | 92 + arch/x86/xen/smp_pv.c | 470 + arch/x86/xen/spinlock.c | 154 + arch/x86/xen/suspend.c | 84 + arch/x86/xen/suspend_hvm.c | 27 + arch/x86/xen/suspend_pv.c | 48 + arch/x86/xen/time.c | 617 + arch/x86/xen/trace.c | 21 + arch/x86/xen/vga.c | 78 + arch/x86/xen/xen-asm.S | 309 + arch/x86/xen/xen-head.S | 112 + arch/x86/xen/xen-ops.h | 165 + 1414 files changed, 466823 insertions(+) create mode 100644 arch/x86/.gitignore create mode 100644 arch/x86/Kbuild create mode 100644 arch/x86/Kconfig create mode 100644 arch/x86/Kconfig.assembler create mode 100644 arch/x86/Kconfig.cpu create mode 100644 arch/x86/Kconfig.debug create mode 100644 arch/x86/Makefile create mode 100644 arch/x86/Makefile.um create mode 100644 arch/x86/Makefile_32.cpu create mode 100644 arch/x86/boot/.gitignore create mode 100644 arch/x86/boot/Makefile create mode 100644 arch/x86/boot/a20.c create mode 100644 arch/x86/boot/apm.c create mode 100644 arch/x86/boot/bioscall.S create mode 100644 arch/x86/boot/bitops.h create mode 100644 arch/x86/boot/boot.h create mode 100644 arch/x86/boot/cmdline.c create mode 100644 arch/x86/boot/compressed/.gitignore create mode 100644 arch/x86/boot/compressed/Makefile create mode 100644 arch/x86/boot/compressed/acpi.c create mode 100644 arch/x86/boot/compressed/cmdline.c create mode 100644 arch/x86/boot/compressed/cpuflags.c create mode 100644 arch/x86/boot/compressed/early_serial_console.c create mode 100644 arch/x86/boot/compressed/efi.c create mode 100644 arch/x86/boot/compressed/efi.h create mode 100644 arch/x86/boot/compressed/efi_thunk_64.S create mode 100644 arch/x86/boot/compressed/error.c create mode 100644 arch/x86/boot/compressed/error.h create mode 100644 arch/x86/boot/compressed/head_32.S create mode 100644 arch/x86/boot/compressed/head_64.S create mode 100644 arch/x86/boot/compressed/ident_map_64.c create mode 100644 arch/x86/boot/compressed/idt_64.c create mode 100644 arch/x86/boot/compressed/idt_handlers_64.S create mode 100644 arch/x86/boot/compressed/kaslr.c create mode 100644 arch/x86/boot/compressed/kernel_info.S create mode 100644 arch/x86/boot/compressed/mem_encrypt.S create mode 100644 arch/x86/boot/compressed/misc.c create mode 100644 arch/x86/boot/compressed/misc.h create mode 100644 arch/x86/boot/compressed/mkpiggy.c create mode 100644 arch/x86/boot/compressed/pgtable.h create mode 100644 arch/x86/boot/compressed/pgtable_64.c create mode 100644 arch/x86/boot/compressed/sev.c create mode 100644 arch/x86/boot/compressed/string.c create mode 100644 arch/x86/boot/compressed/tdcall.S create mode 100644 arch/x86/boot/compressed/tdx.c create mode 100644 arch/x86/boot/compressed/tdx.h create mode 100644 arch/x86/boot/compressed/vmlinux.lds.S create mode 100644 arch/x86/boot/copy.S create mode 100644 arch/x86/boot/cpu.c create mode 100644 arch/x86/boot/cpucheck.c create mode 100644 arch/x86/boot/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.h create mode 100644 arch/x86/boot/ctype.h create mode 100644 arch/x86/boot/early_serial_console.c create mode 100644 arch/x86/boot/edd.c create mode 100644 arch/x86/boot/genimage.sh create mode 100644 arch/x86/boot/header.S create mode 100755 arch/x86/boot/install.sh create mode 100644 arch/x86/boot/io.h create mode 100644 arch/x86/boot/main.c create mode 100644 arch/x86/boot/memory.c create mode 100644 arch/x86/boot/mkcpustr.c create mode 100644 arch/x86/boot/msr.h create mode 100644 arch/x86/boot/mtools.conf.in create mode 100644 arch/x86/boot/pm.c create mode 100644 arch/x86/boot/pmjump.S create mode 100644 arch/x86/boot/printf.c create mode 100644 arch/x86/boot/regs.c create mode 100644 arch/x86/boot/setup.ld create mode 100644 arch/x86/boot/string.c create mode 100644 arch/x86/boot/string.h create mode 100644 arch/x86/boot/tools/.gitignore create mode 100644 arch/x86/boot/tools/build.c create mode 100644 arch/x86/boot/tty.c create mode 100644 arch/x86/boot/version.c create mode 100644 arch/x86/boot/vesa.h create mode 100644 arch/x86/boot/video-bios.c create mode 100644 arch/x86/boot/video-mode.c create mode 100644 arch/x86/boot/video-vesa.c create mode 100644 arch/x86/boot/video-vga.c create mode 100644 arch/x86/boot/video.c create mode 100644 arch/x86/boot/video.h create mode 100644 arch/x86/coco/Makefile create mode 100644 arch/x86/coco/core.c create mode 100644 arch/x86/coco/tdx/Makefile create mode 100644 arch/x86/coco/tdx/tdcall.S create mode 100644 arch/x86/coco/tdx/tdx.c create mode 100644 arch/x86/configs/i386_defconfig create mode 100644 arch/x86/configs/tiny.config create mode 100644 arch/x86/configs/x86_64_defconfig create mode 100644 arch/x86/configs/xen.config create mode 100644 arch/x86/crypto/.gitignore create mode 100644 arch/x86/crypto/Kconfig create mode 100644 arch/x86/crypto/Makefile create mode 100644 arch/x86/crypto/aegis128-aesni-asm.S create mode 100644 arch/x86/crypto/aegis128-aesni-glue.c create mode 100644 arch/x86/crypto/aes_ctrby8_avx-x86_64.S create mode 100644 arch/x86/crypto/aesni-intel_asm.S create mode 100644 arch/x86/crypto/aesni-intel_avx-x86_64.S create mode 100644 arch/x86/crypto/aesni-intel_glue.c create mode 100644 arch/x86/crypto/aria-aesni-avx-asm_64.S create mode 100644 arch/x86/crypto/aria-avx.h create mode 100644 arch/x86/crypto/aria_aesni_avx_glue.c create mode 100644 arch/x86/crypto/blake2s-core.S create mode 100644 arch/x86/crypto/blake2s-glue.c create mode 100644 arch/x86/crypto/blowfish-x86_64-asm_64.S create mode 100644 arch/x86/crypto/blowfish_glue.c create mode 100644 arch/x86/crypto/camellia-aesni-avx-asm_64.S create mode 100644 arch/x86/crypto/camellia-aesni-avx2-asm_64.S create mode 100644 arch/x86/crypto/camellia-x86_64-asm_64.S create mode 100644 arch/x86/crypto/camellia.h create mode 100644 arch/x86/crypto/camellia_aesni_avx2_glue.c create mode 100644 arch/x86/crypto/camellia_aesni_avx_glue.c create mode 100644 arch/x86/crypto/camellia_glue.c create mode 100644 arch/x86/crypto/cast5-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/cast5_avx_glue.c create mode 100644 arch/x86/crypto/cast6-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/cast6_avx_glue.c create mode 100644 arch/x86/crypto/chacha-avx2-x86_64.S create mode 100644 arch/x86/crypto/chacha-avx512vl-x86_64.S create mode 100644 arch/x86/crypto/chacha-ssse3-x86_64.S create mode 100644 arch/x86/crypto/chacha_glue.c create mode 100644 arch/x86/crypto/crc32-pclmul_asm.S create mode 100644 arch/x86/crypto/crc32-pclmul_glue.c create mode 100644 arch/x86/crypto/crc32c-intel_glue.c create mode 100644 arch/x86/crypto/crc32c-pcl-intel-asm_64.S create mode 100644 arch/x86/crypto/crct10dif-pcl-asm_64.S create mode 100644 arch/x86/crypto/crct10dif-pclmul_glue.c create mode 100644 arch/x86/crypto/curve25519-x86_64.c create mode 100644 arch/x86/crypto/des3_ede-asm_64.S create mode 100644 arch/x86/crypto/des3_ede_glue.c create mode 100644 arch/x86/crypto/ecb_cbc_helpers.h create mode 100644 arch/x86/crypto/ghash-clmulni-intel_asm.S create mode 100644 arch/x86/crypto/ghash-clmulni-intel_glue.c create mode 100644 arch/x86/crypto/glue_helper-asm-avx.S create mode 100644 arch/x86/crypto/glue_helper-asm-avx2.S create mode 100644 arch/x86/crypto/nh-avx2-x86_64.S create mode 100644 arch/x86/crypto/nh-sse2-x86_64.S create mode 100644 arch/x86/crypto/nhpoly1305-avx2-glue.c create mode 100644 arch/x86/crypto/nhpoly1305-sse2-glue.c create mode 100644 arch/x86/crypto/poly1305-x86_64-cryptogams.pl create mode 100644 arch/x86/crypto/poly1305_glue.c create mode 100644 arch/x86/crypto/polyval-clmulni_asm.S create mode 100644 arch/x86/crypto/polyval-clmulni_glue.c create mode 100644 arch/x86/crypto/serpent-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/serpent-avx.h create mode 100644 arch/x86/crypto/serpent-avx2-asm_64.S create mode 100644 arch/x86/crypto/serpent-sse2-i586-asm_32.S create mode 100644 arch/x86/crypto/serpent-sse2-x86_64-asm_64.S create mode 100644 arch/x86/crypto/serpent-sse2.h create mode 100644 arch/x86/crypto/serpent_avx2_glue.c create mode 100644 arch/x86/crypto/serpent_avx_glue.c create mode 100644 arch/x86/crypto/serpent_sse2_glue.c create mode 100644 arch/x86/crypto/sha1_avx2_x86_64_asm.S create mode 100644 arch/x86/crypto/sha1_ni_asm.S create mode 100644 arch/x86/crypto/sha1_ssse3_asm.S create mode 100644 arch/x86/crypto/sha1_ssse3_glue.c create mode 100644 arch/x86/crypto/sha256-avx-asm.S create mode 100644 arch/x86/crypto/sha256-avx2-asm.S create mode 100644 arch/x86/crypto/sha256-ssse3-asm.S create mode 100644 arch/x86/crypto/sha256_ni_asm.S create mode 100644 arch/x86/crypto/sha256_ssse3_glue.c create mode 100644 arch/x86/crypto/sha512-avx-asm.S create mode 100644 arch/x86/crypto/sha512-avx2-asm.S create mode 100644 arch/x86/crypto/sha512-ssse3-asm.S create mode 100644 arch/x86/crypto/sha512_ssse3_glue.c create mode 100644 arch/x86/crypto/sm3-avx-asm_64.S create mode 100644 arch/x86/crypto/sm3_avx_glue.c create mode 100644 arch/x86/crypto/sm4-aesni-avx-asm_64.S create mode 100644 arch/x86/crypto/sm4-aesni-avx2-asm_64.S create mode 100644 arch/x86/crypto/sm4-avx.h create mode 100644 arch/x86/crypto/sm4_aesni_avx2_glue.c create mode 100644 arch/x86/crypto/sm4_aesni_avx_glue.c create mode 100644 arch/x86/crypto/twofish-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/twofish-i586-asm_32.S create mode 100644 arch/x86/crypto/twofish-x86_64-asm_64-3way.S create mode 100644 arch/x86/crypto/twofish-x86_64-asm_64.S create mode 100644 arch/x86/crypto/twofish.h create mode 100644 arch/x86/crypto/twofish_avx_glue.c create mode 100644 arch/x86/crypto/twofish_glue.c create mode 100644 arch/x86/crypto/twofish_glue_3way.c create mode 100644 arch/x86/entry/Makefile create mode 100644 arch/x86/entry/calling.h create mode 100644 arch/x86/entry/common.c create mode 100644 arch/x86/entry/entry.S create mode 100644 arch/x86/entry/entry_32.S create mode 100644 arch/x86/entry/entry_64.S create mode 100644 arch/x86/entry/entry_64_compat.S create mode 100644 arch/x86/entry/syscall_32.c create mode 100644 arch/x86/entry/syscall_64.c create mode 100644 arch/x86/entry/syscall_x32.c create mode 100644 arch/x86/entry/syscalls/Makefile create mode 100644 arch/x86/entry/syscalls/syscall_32.tbl create mode 100644 arch/x86/entry/syscalls/syscall_64.tbl create mode 100644 arch/x86/entry/thunk_32.S create mode 100644 arch/x86/entry/thunk_64.S create mode 100644 arch/x86/entry/vdso/.gitignore create mode 100644 arch/x86/entry/vdso/Makefile create mode 100755 arch/x86/entry/vdso/checkundef.sh create mode 100644 arch/x86/entry/vdso/extable.c create mode 100644 arch/x86/entry/vdso/extable.h create mode 100644 arch/x86/entry/vdso/vclock_gettime.c create mode 100644 arch/x86/entry/vdso/vdso-layout.lds.S create mode 100644 arch/x86/entry/vdso/vdso-note.S create mode 100644 arch/x86/entry/vdso/vdso.lds.S create mode 100644 arch/x86/entry/vdso/vdso2c.c create mode 100644 arch/x86/entry/vdso/vdso2c.h create mode 100644 arch/x86/entry/vdso/vdso32-setup.c create mode 100644 arch/x86/entry/vdso/vdso32/.gitignore create mode 100644 arch/x86/entry/vdso/vdso32/note.S create mode 100644 arch/x86/entry/vdso/vdso32/sigreturn.S create mode 100644 arch/x86/entry/vdso/vdso32/system_call.S create mode 100644 arch/x86/entry/vdso/vdso32/vclock_gettime.c create mode 100644 arch/x86/entry/vdso/vdso32/vdso32.lds.S create mode 100644 arch/x86/entry/vdso/vdsox32.lds.S create mode 100644 arch/x86/entry/vdso/vgetcpu.c create mode 100644 arch/x86/entry/vdso/vma.c create mode 100644 arch/x86/entry/vdso/vsgx.S create mode 100644 arch/x86/entry/vsyscall/Makefile create mode 100644 arch/x86/entry/vsyscall/vsyscall_64.c create mode 100644 arch/x86/entry/vsyscall/vsyscall_emu_64.S create mode 100644 arch/x86/entry/vsyscall/vsyscall_trace.h create mode 100644 arch/x86/events/Kconfig create mode 100644 arch/x86/events/Makefile create mode 100644 arch/x86/events/amd/Makefile create mode 100644 arch/x86/events/amd/brs.c create mode 100644 arch/x86/events/amd/core.c create mode 100644 arch/x86/events/amd/ibs.c create mode 100644 arch/x86/events/amd/iommu.c create mode 100644 arch/x86/events/amd/iommu.h create mode 100644 arch/x86/events/amd/lbr.c create mode 100644 arch/x86/events/amd/power.c create mode 100644 arch/x86/events/amd/uncore.c create mode 100644 arch/x86/events/core.c create mode 100644 arch/x86/events/intel/Makefile create mode 100644 arch/x86/events/intel/bts.c create mode 100644 arch/x86/events/intel/core.c create mode 100644 arch/x86/events/intel/cstate.c create mode 100644 arch/x86/events/intel/ds.c create mode 100644 arch/x86/events/intel/knc.c create mode 100644 arch/x86/events/intel/lbr.c create mode 100644 arch/x86/events/intel/p4.c create mode 100644 arch/x86/events/intel/p6.c create mode 100644 arch/x86/events/intel/pt.c create mode 100644 arch/x86/events/intel/pt.h create mode 100644 arch/x86/events/intel/uncore.c create mode 100644 arch/x86/events/intel/uncore.h create mode 100644 arch/x86/events/intel/uncore_discovery.c create mode 100644 arch/x86/events/intel/uncore_discovery.h create mode 100644 arch/x86/events/intel/uncore_nhmex.c create mode 100644 arch/x86/events/intel/uncore_snb.c create mode 100644 arch/x86/events/intel/uncore_snbep.c create mode 100644 arch/x86/events/msr.c create mode 100644 arch/x86/events/perf_event.h create mode 100644 arch/x86/events/perf_event_flags.h create mode 100644 arch/x86/events/probe.c create mode 100644 arch/x86/events/probe.h create mode 100644 arch/x86/events/rapl.c create mode 100644 arch/x86/events/utils.c create mode 100644 arch/x86/events/zhaoxin/Makefile create mode 100644 arch/x86/events/zhaoxin/core.c create mode 100644 arch/x86/hyperv/Makefile create mode 100644 arch/x86/hyperv/hv_apic.c create mode 100644 arch/x86/hyperv/hv_init.c create mode 100644 arch/x86/hyperv/hv_proc.c create mode 100644 arch/x86/hyperv/hv_spinlock.c create mode 100644 arch/x86/hyperv/irqdomain.c create mode 100644 arch/x86/hyperv/ivm.c create mode 100644 arch/x86/hyperv/mmu.c create mode 100644 arch/x86/hyperv/nested.c create mode 100644 arch/x86/ia32/Makefile create mode 100644 arch/x86/ia32/audit.c create mode 100644 arch/x86/ia32/ia32_signal.c create mode 100644 arch/x86/include/asm/GEN-for-each-reg.h create mode 100644 arch/x86/include/asm/Kbuild create mode 100644 arch/x86/include/asm/acenv.h create mode 100644 arch/x86/include/asm/acpi.h create mode 100644 arch/x86/include/asm/acrn.h create mode 100644 arch/x86/include/asm/agp.h create mode 100644 arch/x86/include/asm/alternative.h create mode 100644 arch/x86/include/asm/amd-ibs.h create mode 100644 arch/x86/include/asm/amd_hsmp.h create mode 100644 arch/x86/include/asm/amd_nb.h create mode 100644 arch/x86/include/asm/apic.h create mode 100644 arch/x86/include/asm/apicdef.h create mode 100644 arch/x86/include/asm/apm.h create mode 100644 arch/x86/include/asm/arch_hweight.h create mode 100644 arch/x86/include/asm/archrandom.h create mode 100644 arch/x86/include/asm/asm-offsets.h create mode 100644 arch/x86/include/asm/asm-prototypes.h create mode 100644 arch/x86/include/asm/asm.h create mode 100644 arch/x86/include/asm/atomic.h create mode 100644 arch/x86/include/asm/atomic64_32.h create mode 100644 arch/x86/include/asm/atomic64_64.h create mode 100644 arch/x86/include/asm/audit.h create mode 100644 arch/x86/include/asm/barrier.h create mode 100644 arch/x86/include/asm/bios_ebda.h create mode 100644 arch/x86/include/asm/bitops.h create mode 100644 arch/x86/include/asm/boot.h create mode 100644 arch/x86/include/asm/bootparam_utils.h create mode 100644 arch/x86/include/asm/bug.h create mode 100644 arch/x86/include/asm/bugs.h create mode 100644 arch/x86/include/asm/cache.h create mode 100644 arch/x86/include/asm/cacheflush.h create mode 100644 arch/x86/include/asm/cacheinfo.h create mode 100644 arch/x86/include/asm/ce4100.h create mode 100644 arch/x86/include/asm/cfi.h create mode 100644 arch/x86/include/asm/checksum.h create mode 100644 arch/x86/include/asm/checksum_32.h create mode 100644 arch/x86/include/asm/checksum_64.h create mode 100644 arch/x86/include/asm/clocksource.h create mode 100644 arch/x86/include/asm/cmdline.h create mode 100644 arch/x86/include/asm/cmpxchg.h create mode 100644 arch/x86/include/asm/cmpxchg_32.h create mode 100644 arch/x86/include/asm/cmpxchg_64.h create mode 100644 arch/x86/include/asm/coco.h create mode 100644 arch/x86/include/asm/compat.h create mode 100644 arch/x86/include/asm/cpu.h create mode 100644 arch/x86/include/asm/cpu_device_id.h create mode 100644 arch/x86/include/asm/cpu_entry_area.h create mode 100644 arch/x86/include/asm/cpufeature.h create mode 100644 arch/x86/include/asm/cpufeatures.h create mode 100644 arch/x86/include/asm/cpuid.h create mode 100644 arch/x86/include/asm/cpuidle_haltpoll.h create mode 100644 arch/x86/include/asm/cpumask.h create mode 100644 arch/x86/include/asm/crash.h create mode 100644 arch/x86/include/asm/current.h create mode 100644 arch/x86/include/asm/debugreg.h create mode 100644 arch/x86/include/asm/delay.h create mode 100644 arch/x86/include/asm/desc.h create mode 100644 arch/x86/include/asm/desc_defs.h create mode 100644 arch/x86/include/asm/device.h create mode 100644 arch/x86/include/asm/disabled-features.h create mode 100644 arch/x86/include/asm/div64.h create mode 100644 arch/x86/include/asm/dma-mapping.h create mode 100644 arch/x86/include/asm/dma.h create mode 100644 arch/x86/include/asm/dmi.h create mode 100644 arch/x86/include/asm/doublefault.h create mode 100644 arch/x86/include/asm/dwarf2.h create mode 100644 arch/x86/include/asm/e820/api.h create mode 100644 arch/x86/include/asm/e820/types.h create mode 100644 arch/x86/include/asm/edac.h create mode 100644 arch/x86/include/asm/efi.h create mode 100644 arch/x86/include/asm/elf.h create mode 100644 arch/x86/include/asm/elfcore-compat.h create mode 100644 arch/x86/include/asm/emergency-restart.h create mode 100644 arch/x86/include/asm/emulate_prefix.h create mode 100644 arch/x86/include/asm/enclu.h create mode 100644 arch/x86/include/asm/entry-common.h create mode 100644 arch/x86/include/asm/espfix.h create mode 100644 arch/x86/include/asm/exec.h create mode 100644 arch/x86/include/asm/extable.h create mode 100644 arch/x86/include/asm/extable_fixup_types.h create mode 100644 arch/x86/include/asm/fb.h create mode 100644 arch/x86/include/asm/fixmap.h create mode 100644 arch/x86/include/asm/floppy.h create mode 100644 arch/x86/include/asm/fpu/api.h create mode 100644 arch/x86/include/asm/fpu/regset.h create mode 100644 arch/x86/include/asm/fpu/sched.h create mode 100644 arch/x86/include/asm/fpu/signal.h create mode 100644 arch/x86/include/asm/fpu/types.h create mode 100644 arch/x86/include/asm/fpu/xcr.h create mode 100644 arch/x86/include/asm/fpu/xstate.h create mode 100644 arch/x86/include/asm/frame.h create mode 100644 arch/x86/include/asm/fsgsbase.h create mode 100644 arch/x86/include/asm/ftrace.h create mode 100644 arch/x86/include/asm/futex.h create mode 100644 arch/x86/include/asm/gart.h create mode 100644 arch/x86/include/asm/genapic.h create mode 100644 arch/x86/include/asm/geode.h create mode 100644 arch/x86/include/asm/hardirq.h create mode 100644 arch/x86/include/asm/highmem.h create mode 100644 arch/x86/include/asm/hpet.h create mode 100644 arch/x86/include/asm/hugetlb.h create mode 100644 arch/x86/include/asm/hw_breakpoint.h create mode 100644 arch/x86/include/asm/hw_irq.h create mode 100644 arch/x86/include/asm/hyperv-tlfs.h create mode 100644 arch/x86/include/asm/hypervisor.h create mode 100644 arch/x86/include/asm/i8259.h create mode 100644 arch/x86/include/asm/ia32.h create mode 100644 arch/x86/include/asm/ia32_unistd.h create mode 100644 arch/x86/include/asm/ibt.h create mode 100644 arch/x86/include/asm/idtentry.h create mode 100644 arch/x86/include/asm/imr.h create mode 100644 arch/x86/include/asm/inat.h create mode 100644 arch/x86/include/asm/inat_types.h create mode 100644 arch/x86/include/asm/init.h create mode 100644 arch/x86/include/asm/insn-eval.h create mode 100644 arch/x86/include/asm/insn.h create mode 100644 arch/x86/include/asm/inst.h create mode 100644 arch/x86/include/asm/intel-family.h create mode 100644 arch/x86/include/asm/intel-mid.h create mode 100644 arch/x86/include/asm/intel_ds.h create mode 100644 arch/x86/include/asm/intel_pconfig.h create mode 100644 arch/x86/include/asm/intel_pt.h create mode 100644 arch/x86/include/asm/intel_punit_ipc.h create mode 100644 arch/x86/include/asm/intel_scu_ipc.h create mode 100644 arch/x86/include/asm/intel_telemetry.h create mode 100644 arch/x86/include/asm/invpcid.h create mode 100644 arch/x86/include/asm/io.h create mode 100644 arch/x86/include/asm/io_apic.h create mode 100644 arch/x86/include/asm/io_bitmap.h create mode 100644 arch/x86/include/asm/iomap.h create mode 100644 arch/x86/include/asm/iommu.h create mode 100644 arch/x86/include/asm/iosf_mbi.h create mode 100644 arch/x86/include/asm/irq.h create mode 100644 arch/x86/include/asm/irq_remapping.h create mode 100644 arch/x86/include/asm/irq_stack.h create mode 100644 arch/x86/include/asm/irq_vectors.h create mode 100644 arch/x86/include/asm/irq_work.h create mode 100644 arch/x86/include/asm/irqdomain.h create mode 100644 arch/x86/include/asm/irqflags.h create mode 100644 arch/x86/include/asm/ist.h create mode 100644 arch/x86/include/asm/jailhouse_para.h create mode 100644 arch/x86/include/asm/jump_label.h create mode 100644 arch/x86/include/asm/kasan.h create mode 100644 arch/x86/include/asm/kaslr.h create mode 100644 arch/x86/include/asm/kbdleds.h create mode 100644 arch/x86/include/asm/kdebug.h create mode 100644 arch/x86/include/asm/kexec-bzimage64.h create mode 100644 arch/x86/include/asm/kexec.h create mode 100644 arch/x86/include/asm/kfence.h create mode 100644 arch/x86/include/asm/kgdb.h create mode 100644 arch/x86/include/asm/kmsan.h create mode 100644 arch/x86/include/asm/kprobes.h create mode 100644 arch/x86/include/asm/kvm-x86-ops.h create mode 100644 arch/x86/include/asm/kvm-x86-pmu-ops.h create mode 100644 arch/x86/include/asm/kvm_host.h create mode 100644 arch/x86/include/asm/kvm_page_track.h create mode 100644 arch/x86/include/asm/kvm_para.h create mode 100644 arch/x86/include/asm/kvm_types.h create mode 100644 arch/x86/include/asm/kvm_vcpu_regs.h create mode 100644 arch/x86/include/asm/kvmclock.h create mode 100644 arch/x86/include/asm/linkage.h create mode 100644 arch/x86/include/asm/local.h create mode 100644 arch/x86/include/asm/mach_timer.h create mode 100644 arch/x86/include/asm/mach_traps.h create mode 100644 arch/x86/include/asm/math_emu.h create mode 100644 arch/x86/include/asm/mc146818rtc.h create mode 100644 arch/x86/include/asm/mce.h create mode 100644 arch/x86/include/asm/mem_encrypt.h create mode 100644 arch/x86/include/asm/memtype.h create mode 100644 arch/x86/include/asm/microcode.h create mode 100644 arch/x86/include/asm/microcode_amd.h create mode 100644 arch/x86/include/asm/microcode_intel.h create mode 100644 arch/x86/include/asm/misc.h create mode 100644 arch/x86/include/asm/mmconfig.h create mode 100644 arch/x86/include/asm/mmu.h create mode 100644 arch/x86/include/asm/mmu_context.h create mode 100644 arch/x86/include/asm/mmzone.h create mode 100644 arch/x86/include/asm/mmzone_32.h create mode 100644 arch/x86/include/asm/mmzone_64.h create mode 100644 arch/x86/include/asm/module.h create mode 100644 arch/x86/include/asm/mpspec.h create mode 100644 arch/x86/include/asm/mpspec_def.h create mode 100644 arch/x86/include/asm/mshyperv.h create mode 100644 arch/x86/include/asm/msi.h create mode 100644 arch/x86/include/asm/msr-index.h create mode 100644 arch/x86/include/asm/msr-trace.h create mode 100644 arch/x86/include/asm/msr.h create mode 100644 arch/x86/include/asm/mtrr.h create mode 100644 arch/x86/include/asm/mwait.h create mode 100644 arch/x86/include/asm/nmi.h create mode 100644 arch/x86/include/asm/nops.h create mode 100644 arch/x86/include/asm/nospec-branch.h create mode 100644 arch/x86/include/asm/numa.h create mode 100644 arch/x86/include/asm/numa_32.h create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/include/asm/numachip/numachip_csr.h create mode 100644 arch/x86/include/asm/olpc.h create mode 100644 arch/x86/include/asm/olpc_ofw.h create mode 100644 arch/x86/include/asm/orc_lookup.h create mode 100644 arch/x86/include/asm/orc_types.h create mode 100644 arch/x86/include/asm/page.h create mode 100644 arch/x86/include/asm/page_32.h create mode 100644 arch/x86/include/asm/page_32_types.h create mode 100644 arch/x86/include/asm/page_64.h create mode 100644 arch/x86/include/asm/page_64_types.h create mode 100644 arch/x86/include/asm/page_types.h create mode 100644 arch/x86/include/asm/paravirt.h create mode 100644 arch/x86/include/asm/paravirt_api_clock.h create mode 100644 arch/x86/include/asm/paravirt_types.h create mode 100644 arch/x86/include/asm/parport.h create mode 100644 arch/x86/include/asm/pc-conf-reg.h create mode 100644 arch/x86/include/asm/pci-direct.h create mode 100644 arch/x86/include/asm/pci-functions.h create mode 100644 arch/x86/include/asm/pci.h create mode 100644 arch/x86/include/asm/pci_x86.h create mode 100644 arch/x86/include/asm/percpu.h create mode 100644 arch/x86/include/asm/perf_event.h create mode 100644 arch/x86/include/asm/perf_event_p4.h create mode 100644 arch/x86/include/asm/pgalloc.h create mode 100644 arch/x86/include/asm/pgtable-2level.h create mode 100644 arch/x86/include/asm/pgtable-2level_types.h create mode 100644 arch/x86/include/asm/pgtable-3level.h create mode 100644 arch/x86/include/asm/pgtable-3level_types.h create mode 100644 arch/x86/include/asm/pgtable-invert.h create mode 100644 arch/x86/include/asm/pgtable.h create mode 100644 arch/x86/include/asm/pgtable_32.h create mode 100644 arch/x86/include/asm/pgtable_32_areas.h create mode 100644 arch/x86/include/asm/pgtable_32_types.h create mode 100644 arch/x86/include/asm/pgtable_64.h create mode 100644 arch/x86/include/asm/pgtable_64_types.h create mode 100644 arch/x86/include/asm/pgtable_areas.h create mode 100644 arch/x86/include/asm/pgtable_types.h create mode 100644 arch/x86/include/asm/pkeys.h create mode 100644 arch/x86/include/asm/pkru.h create mode 100644 arch/x86/include/asm/platform_sst_audio.h create mode 100644 arch/x86/include/asm/pm-trace.h create mode 100644 arch/x86/include/asm/posix_types.h create mode 100644 arch/x86/include/asm/preempt.h create mode 100644 arch/x86/include/asm/probe_roms.h create mode 100644 arch/x86/include/asm/processor-cyrix.h create mode 100644 arch/x86/include/asm/processor-flags.h create mode 100644 arch/x86/include/asm/processor.h create mode 100644 arch/x86/include/asm/prom.h create mode 100644 arch/x86/include/asm/proto.h create mode 100644 arch/x86/include/asm/pti.h create mode 100644 arch/x86/include/asm/ptrace.h create mode 100644 arch/x86/include/asm/purgatory.h create mode 100644 arch/x86/include/asm/pvclock-abi.h create mode 100644 arch/x86/include/asm/pvclock.h create mode 100644 arch/x86/include/asm/qrwlock.h create mode 100644 arch/x86/include/asm/qspinlock.h create mode 100644 arch/x86/include/asm/qspinlock_paravirt.h create mode 100644 arch/x86/include/asm/realmode.h create mode 100644 arch/x86/include/asm/reboot.h create mode 100644 arch/x86/include/asm/reboot_fixups.h create mode 100644 arch/x86/include/asm/required-features.h create mode 100644 arch/x86/include/asm/resctrl.h create mode 100644 arch/x86/include/asm/rmwcc.h create mode 100644 arch/x86/include/asm/seccomp.h create mode 100644 arch/x86/include/asm/sections.h create mode 100644 arch/x86/include/asm/segment.h create mode 100644 arch/x86/include/asm/serial.h create mode 100644 arch/x86/include/asm/set_memory.h create mode 100644 arch/x86/include/asm/setup.h create mode 100644 arch/x86/include/asm/setup_arch.h create mode 100644 arch/x86/include/asm/sev-common.h create mode 100644 arch/x86/include/asm/sev.h create mode 100644 arch/x86/include/asm/sgx.h create mode 100644 arch/x86/include/asm/shared/io.h create mode 100644 arch/x86/include/asm/shared/msr.h create mode 100644 arch/x86/include/asm/shared/tdx.h create mode 100644 arch/x86/include/asm/shmparam.h create mode 100644 arch/x86/include/asm/sigcontext.h create mode 100644 arch/x86/include/asm/sigframe.h create mode 100644 arch/x86/include/asm/sighandling.h create mode 100644 arch/x86/include/asm/signal.h create mode 100644 arch/x86/include/asm/simd.h create mode 100644 arch/x86/include/asm/smap.h create mode 100644 arch/x86/include/asm/smp.h create mode 100644 arch/x86/include/asm/softirq_stack.h create mode 100644 arch/x86/include/asm/sparsemem.h create mode 100644 arch/x86/include/asm/spec-ctrl.h create mode 100644 arch/x86/include/asm/special_insns.h create mode 100644 arch/x86/include/asm/spinlock.h create mode 100644 arch/x86/include/asm/spinlock_types.h create mode 100644 arch/x86/include/asm/sta2x11.h create mode 100644 arch/x86/include/asm/stackprotector.h create mode 100644 arch/x86/include/asm/stacktrace.h create mode 100644 arch/x86/include/asm/static_call.h create mode 100644 arch/x86/include/asm/string.h create mode 100644 arch/x86/include/asm/string_32.h create mode 100644 arch/x86/include/asm/string_64.h create mode 100644 arch/x86/include/asm/suspend.h create mode 100644 arch/x86/include/asm/suspend_32.h create mode 100644 arch/x86/include/asm/suspend_64.h create mode 100644 arch/x86/include/asm/svm.h create mode 100644 arch/x86/include/asm/switch_to.h create mode 100644 arch/x86/include/asm/sync_bitops.h create mode 100644 arch/x86/include/asm/sync_core.h create mode 100644 arch/x86/include/asm/syscall.h create mode 100644 arch/x86/include/asm/syscall_wrapper.h create mode 100644 arch/x86/include/asm/syscalls.h create mode 100644 arch/x86/include/asm/tdx.h create mode 100644 arch/x86/include/asm/text-patching.h create mode 100644 arch/x86/include/asm/thermal.h create mode 100644 arch/x86/include/asm/thread_info.h create mode 100644 arch/x86/include/asm/time.h create mode 100644 arch/x86/include/asm/timer.h create mode 100644 arch/x86/include/asm/timex.h create mode 100644 arch/x86/include/asm/tlb.h create mode 100644 arch/x86/include/asm/tlbbatch.h create mode 100644 arch/x86/include/asm/tlbflush.h create mode 100644 arch/x86/include/asm/topology.h create mode 100644 arch/x86/include/asm/trace/common.h create mode 100644 arch/x86/include/asm/trace/exceptions.h create mode 100644 arch/x86/include/asm/trace/fpu.h create mode 100644 arch/x86/include/asm/trace/hyperv.h create mode 100644 arch/x86/include/asm/trace/irq_vectors.h create mode 100644 arch/x86/include/asm/trace_clock.h create mode 100644 arch/x86/include/asm/trap_pf.h create mode 100644 arch/x86/include/asm/trapnr.h create mode 100644 arch/x86/include/asm/traps.h create mode 100644 arch/x86/include/asm/tsc.h create mode 100644 arch/x86/include/asm/uaccess.h create mode 100644 arch/x86/include/asm/uaccess_32.h create mode 100644 arch/x86/include/asm/uaccess_64.h create mode 100644 arch/x86/include/asm/umip.h create mode 100644 arch/x86/include/asm/unistd.h create mode 100644 arch/x86/include/asm/unwind.h create mode 100644 arch/x86/include/asm/unwind_hints.h create mode 100644 arch/x86/include/asm/uprobes.h create mode 100644 arch/x86/include/asm/user.h create mode 100644 arch/x86/include/asm/user32.h create mode 100644 arch/x86/include/asm/user_32.h create mode 100644 arch/x86/include/asm/user_64.h create mode 100644 arch/x86/include/asm/uv/bios.h create mode 100644 arch/x86/include/asm/uv/uv.h create mode 100644 arch/x86/include/asm/uv/uv_geo.h create mode 100644 arch/x86/include/asm/uv/uv_hub.h create mode 100644 arch/x86/include/asm/uv/uv_irq.h create mode 100644 arch/x86/include/asm/uv/uv_mmrs.h create mode 100644 arch/x86/include/asm/vdso.h create mode 100644 arch/x86/include/asm/vdso/clocksource.h create mode 100644 arch/x86/include/asm/vdso/gettimeofday.h create mode 100644 arch/x86/include/asm/vdso/processor.h create mode 100644 arch/x86/include/asm/vdso/vsyscall.h create mode 100644 arch/x86/include/asm/vermagic.h create mode 100644 arch/x86/include/asm/vga.h create mode 100644 arch/x86/include/asm/vgtod.h create mode 100644 arch/x86/include/asm/virtext.h create mode 100644 arch/x86/include/asm/vm86.h create mode 100644 arch/x86/include/asm/vmalloc.h create mode 100644 arch/x86/include/asm/vmware.h create mode 100644 arch/x86/include/asm/vmx.h create mode 100644 arch/x86/include/asm/vmxfeatures.h create mode 100644 arch/x86/include/asm/vsyscall.h create mode 100644 arch/x86/include/asm/vvar.h create mode 100644 arch/x86/include/asm/word-at-a-time.h create mode 100644 arch/x86/include/asm/x86_init.h create mode 100644 arch/x86/include/asm/xen/cpuid.h create mode 100644 arch/x86/include/asm/xen/events.h create mode 100644 arch/x86/include/asm/xen/hypercall.h create mode 100644 arch/x86/include/asm/xen/hypervisor.h create mode 100644 arch/x86/include/asm/xen/interface.h create mode 100644 arch/x86/include/asm/xen/interface_32.h create mode 100644 arch/x86/include/asm/xen/interface_64.h create mode 100644 arch/x86/include/asm/xen/page.h create mode 100644 arch/x86/include/asm/xen/pci.h create mode 100644 arch/x86/include/asm/xen/swiotlb-xen.h create mode 100644 arch/x86/include/asm/xen/trace_types.h create mode 100644 arch/x86/include/asm/xor.h create mode 100644 arch/x86/include/asm/xor_32.h create mode 100644 arch/x86/include/asm/xor_64.h create mode 100644 arch/x86/include/asm/xor_avx.h create mode 100644 arch/x86/include/uapi/asm/Kbuild create mode 100644 arch/x86/include/uapi/asm/a.out.h create mode 100644 arch/x86/include/uapi/asm/amd_hsmp.h create mode 100644 arch/x86/include/uapi/asm/auxvec.h create mode 100644 arch/x86/include/uapi/asm/bitsperlong.h create mode 100644 arch/x86/include/uapi/asm/boot.h create mode 100644 arch/x86/include/uapi/asm/bootparam.h create mode 100644 arch/x86/include/uapi/asm/byteorder.h create mode 100644 arch/x86/include/uapi/asm/debugreg.h create mode 100644 arch/x86/include/uapi/asm/e820.h create mode 100644 arch/x86/include/uapi/asm/hw_breakpoint.h create mode 100644 arch/x86/include/uapi/asm/hwcap2.h create mode 100644 arch/x86/include/uapi/asm/ist.h create mode 100644 arch/x86/include/uapi/asm/kvm.h create mode 100644 arch/x86/include/uapi/asm/kvm_para.h create mode 100644 arch/x86/include/uapi/asm/kvm_perf.h create mode 100644 arch/x86/include/uapi/asm/ldt.h create mode 100644 arch/x86/include/uapi/asm/mce.h create mode 100644 arch/x86/include/uapi/asm/mman.h create mode 100644 arch/x86/include/uapi/asm/msgbuf.h create mode 100644 arch/x86/include/uapi/asm/msr.h create mode 100644 arch/x86/include/uapi/asm/mtrr.h create mode 100644 arch/x86/include/uapi/asm/perf_regs.h create mode 100644 arch/x86/include/uapi/asm/posix_types.h create mode 100644 arch/x86/include/uapi/asm/posix_types_32.h create mode 100644 arch/x86/include/uapi/asm/posix_types_64.h create mode 100644 arch/x86/include/uapi/asm/posix_types_x32.h create mode 100644 arch/x86/include/uapi/asm/prctl.h create mode 100644 arch/x86/include/uapi/asm/processor-flags.h create mode 100644 arch/x86/include/uapi/asm/ptrace-abi.h create mode 100644 arch/x86/include/uapi/asm/ptrace.h create mode 100644 arch/x86/include/uapi/asm/sembuf.h create mode 100644 arch/x86/include/uapi/asm/setup.h create mode 100644 arch/x86/include/uapi/asm/sgx.h create mode 100644 arch/x86/include/uapi/asm/shmbuf.h create mode 100644 arch/x86/include/uapi/asm/sigcontext.h create mode 100644 arch/x86/include/uapi/asm/sigcontext32.h create mode 100644 arch/x86/include/uapi/asm/siginfo.h create mode 100644 arch/x86/include/uapi/asm/signal.h create mode 100644 arch/x86/include/uapi/asm/stat.h create mode 100644 arch/x86/include/uapi/asm/statfs.h create mode 100644 arch/x86/include/uapi/asm/svm.h create mode 100644 arch/x86/include/uapi/asm/swab.h create mode 100644 arch/x86/include/uapi/asm/ucontext.h create mode 100644 arch/x86/include/uapi/asm/unistd.h create mode 100644 arch/x86/include/uapi/asm/vm86.h create mode 100644 arch/x86/include/uapi/asm/vmx.h create mode 100644 arch/x86/include/uapi/asm/vsyscall.h create mode 100644 arch/x86/kernel/.gitignore create mode 100644 arch/x86/kernel/Makefile create mode 100644 arch/x86/kernel/acpi/Makefile create mode 100644 arch/x86/kernel/acpi/apei.c create mode 100644 arch/x86/kernel/acpi/boot.c create mode 100644 arch/x86/kernel/acpi/cppc.c create mode 100644 arch/x86/kernel/acpi/cstate.c create mode 100644 arch/x86/kernel/acpi/sleep.c create mode 100644 arch/x86/kernel/acpi/sleep.h create mode 100644 arch/x86/kernel/acpi/wakeup_32.S create mode 100644 arch/x86/kernel/acpi/wakeup_64.S create mode 100644 arch/x86/kernel/alternative.c create mode 100644 arch/x86/kernel/amd_gart_64.c create mode 100644 arch/x86/kernel/amd_nb.c create mode 100644 arch/x86/kernel/aperture_64.c create mode 100644 arch/x86/kernel/apic/Makefile create mode 100644 arch/x86/kernel/apic/apic.c create mode 100644 arch/x86/kernel/apic/apic_common.c create mode 100644 arch/x86/kernel/apic/apic_flat_64.c create mode 100644 arch/x86/kernel/apic/apic_noop.c create mode 100644 arch/x86/kernel/apic/apic_numachip.c create mode 100644 arch/x86/kernel/apic/bigsmp_32.c create mode 100644 arch/x86/kernel/apic/hw_nmi.c create mode 100644 arch/x86/kernel/apic/io_apic.c create mode 100644 arch/x86/kernel/apic/ipi.c create mode 100644 arch/x86/kernel/apic/local.h create mode 100644 arch/x86/kernel/apic/msi.c create mode 100644 arch/x86/kernel/apic/probe_32.c create mode 100644 arch/x86/kernel/apic/probe_64.c create mode 100644 arch/x86/kernel/apic/vector.c create mode 100644 arch/x86/kernel/apic/x2apic_cluster.c create mode 100644 arch/x86/kernel/apic/x2apic_phys.c create mode 100644 arch/x86/kernel/apic/x2apic_uv_x.c create mode 100644 arch/x86/kernel/apm_32.c create mode 100644 arch/x86/kernel/asm-offsets.c create mode 100644 arch/x86/kernel/asm-offsets_32.c create mode 100644 arch/x86/kernel/asm-offsets_64.c create mode 100644 arch/x86/kernel/audit_64.c create mode 100644 arch/x86/kernel/bootflag.c create mode 100644 arch/x86/kernel/cfi.c create mode 100644 arch/x86/kernel/check.c create mode 100644 arch/x86/kernel/cpu/.gitignore create mode 100644 arch/x86/kernel/cpu/Makefile create mode 100644 arch/x86/kernel/cpu/acrn.c create mode 100644 arch/x86/kernel/cpu/amd.c create mode 100644 arch/x86/kernel/cpu/aperfmperf.c create mode 100644 arch/x86/kernel/cpu/bugs.c create mode 100644 arch/x86/kernel/cpu/cacheinfo.c create mode 100644 arch/x86/kernel/cpu/centaur.c create mode 100644 arch/x86/kernel/cpu/common.c create mode 100644 arch/x86/kernel/cpu/cpu.h create mode 100644 arch/x86/kernel/cpu/cpuid-deps.c create mode 100644 arch/x86/kernel/cpu/cyrix.c create mode 100644 arch/x86/kernel/cpu/feat_ctl.c create mode 100644 arch/x86/kernel/cpu/hygon.c create mode 100644 arch/x86/kernel/cpu/hypervisor.c create mode 100644 arch/x86/kernel/cpu/intel.c create mode 100644 arch/x86/kernel/cpu/intel_epb.c create mode 100644 arch/x86/kernel/cpu/intel_pconfig.c create mode 100644 arch/x86/kernel/cpu/match.c create mode 100644 arch/x86/kernel/cpu/mce/Makefile create mode 100644 arch/x86/kernel/cpu/mce/amd.c create mode 100644 arch/x86/kernel/cpu/mce/apei.c create mode 100644 arch/x86/kernel/cpu/mce/core.c create mode 100644 arch/x86/kernel/cpu/mce/dev-mcelog.c create mode 100644 arch/x86/kernel/cpu/mce/genpool.c create mode 100644 arch/x86/kernel/cpu/mce/inject.c create mode 100644 arch/x86/kernel/cpu/mce/intel.c create mode 100644 arch/x86/kernel/cpu/mce/internal.h create mode 100644 arch/x86/kernel/cpu/mce/p5.c create mode 100644 arch/x86/kernel/cpu/mce/severity.c create mode 100644 arch/x86/kernel/cpu/mce/threshold.c create mode 100644 arch/x86/kernel/cpu/mce/winchip.c create mode 100644 arch/x86/kernel/cpu/microcode/Makefile create mode 100644 arch/x86/kernel/cpu/microcode/amd.c create mode 100644 arch/x86/kernel/cpu/microcode/core.c create mode 100644 arch/x86/kernel/cpu/microcode/intel.c create mode 100644 arch/x86/kernel/cpu/mkcapflags.sh create mode 100644 arch/x86/kernel/cpu/mshyperv.c create mode 100644 arch/x86/kernel/cpu/mtrr/Makefile create mode 100644 arch/x86/kernel/cpu/mtrr/amd.c create mode 100644 arch/x86/kernel/cpu/mtrr/centaur.c create mode 100644 arch/x86/kernel/cpu/mtrr/cleanup.c create mode 100644 arch/x86/kernel/cpu/mtrr/cyrix.c create mode 100644 arch/x86/kernel/cpu/mtrr/generic.c create mode 100644 arch/x86/kernel/cpu/mtrr/if.c create mode 100644 arch/x86/kernel/cpu/mtrr/mtrr.c create mode 100644 arch/x86/kernel/cpu/mtrr/mtrr.h create mode 100644 arch/x86/kernel/cpu/perfctr-watchdog.c create mode 100644 arch/x86/kernel/cpu/powerflags.c create mode 100644 arch/x86/kernel/cpu/proc.c create mode 100644 arch/x86/kernel/cpu/rdrand.c create mode 100644 arch/x86/kernel/cpu/resctrl/Makefile create mode 100644 arch/x86/kernel/cpu/resctrl/core.c create mode 100644 arch/x86/kernel/cpu/resctrl/ctrlmondata.c create mode 100644 arch/x86/kernel/cpu/resctrl/internal.h create mode 100644 arch/x86/kernel/cpu/resctrl/monitor.c create mode 100644 arch/x86/kernel/cpu/resctrl/pseudo_lock.c create mode 100644 arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h create mode 100644 arch/x86/kernel/cpu/resctrl/rdtgroup.c create mode 100644 arch/x86/kernel/cpu/scattered.c create mode 100644 arch/x86/kernel/cpu/sgx/Makefile create mode 100644 arch/x86/kernel/cpu/sgx/driver.c create mode 100644 arch/x86/kernel/cpu/sgx/driver.h create mode 100644 arch/x86/kernel/cpu/sgx/encl.c create mode 100644 arch/x86/kernel/cpu/sgx/encl.h create mode 100644 arch/x86/kernel/cpu/sgx/encls.h create mode 100644 arch/x86/kernel/cpu/sgx/ioctl.c create mode 100644 arch/x86/kernel/cpu/sgx/main.c create mode 100644 arch/x86/kernel/cpu/sgx/sgx.h create mode 100644 arch/x86/kernel/cpu/sgx/virt.c create mode 100644 arch/x86/kernel/cpu/topology.c create mode 100644 arch/x86/kernel/cpu/transmeta.c create mode 100644 arch/x86/kernel/cpu/tsx.c create mode 100644 arch/x86/kernel/cpu/umc.c create mode 100644 arch/x86/kernel/cpu/umwait.c create mode 100644 arch/x86/kernel/cpu/vmware.c create mode 100644 arch/x86/kernel/cpu/vortex.c create mode 100644 arch/x86/kernel/cpu/zhaoxin.c create mode 100644 arch/x86/kernel/cpuid.c create mode 100644 arch/x86/kernel/crash.c create mode 100644 arch/x86/kernel/crash_core_32.c create mode 100644 arch/x86/kernel/crash_core_64.c create mode 100644 arch/x86/kernel/crash_dump_32.c create mode 100644 arch/x86/kernel/crash_dump_64.c create mode 100644 arch/x86/kernel/devicetree.c create mode 100644 arch/x86/kernel/doublefault_32.c create mode 100644 arch/x86/kernel/dumpstack.c create mode 100644 arch/x86/kernel/dumpstack_32.c create mode 100644 arch/x86/kernel/dumpstack_64.c create mode 100644 arch/x86/kernel/e820.c create mode 100644 arch/x86/kernel/early-quirks.c create mode 100644 arch/x86/kernel/early_printk.c create mode 100644 arch/x86/kernel/ebda.c create mode 100644 arch/x86/kernel/eisa.c create mode 100644 arch/x86/kernel/espfix_64.c create mode 100644 arch/x86/kernel/fpu/Makefile create mode 100644 arch/x86/kernel/fpu/bugs.c create mode 100644 arch/x86/kernel/fpu/context.h create mode 100644 arch/x86/kernel/fpu/core.c create mode 100644 arch/x86/kernel/fpu/init.c create mode 100644 arch/x86/kernel/fpu/internal.h create mode 100644 arch/x86/kernel/fpu/legacy.h create mode 100644 arch/x86/kernel/fpu/regset.c create mode 100644 arch/x86/kernel/fpu/signal.c create mode 100644 arch/x86/kernel/fpu/xstate.c create mode 100644 arch/x86/kernel/fpu/xstate.h create mode 100644 arch/x86/kernel/ftrace.c create mode 100644 arch/x86/kernel/ftrace_32.S create mode 100644 arch/x86/kernel/ftrace_64.S create mode 100644 arch/x86/kernel/head32.c create mode 100644 arch/x86/kernel/head64.c create mode 100644 arch/x86/kernel/head_32.S create mode 100644 arch/x86/kernel/head_64.S create mode 100644 arch/x86/kernel/hpet.c create mode 100644 arch/x86/kernel/hw_breakpoint.c create mode 100644 arch/x86/kernel/i8237.c create mode 100644 arch/x86/kernel/i8253.c create mode 100644 arch/x86/kernel/i8259.c create mode 100644 arch/x86/kernel/idt.c create mode 100644 arch/x86/kernel/io_delay.c create mode 100644 arch/x86/kernel/ioport.c create mode 100644 arch/x86/kernel/irq.c create mode 100644 arch/x86/kernel/irq_32.c create mode 100644 arch/x86/kernel/irq_64.c create mode 100644 arch/x86/kernel/irq_work.c create mode 100644 arch/x86/kernel/irqflags.S create mode 100644 arch/x86/kernel/irqinit.c create mode 100644 arch/x86/kernel/itmt.c create mode 100644 arch/x86/kernel/jailhouse.c create mode 100644 arch/x86/kernel/jump_label.c create mode 100644 arch/x86/kernel/kdebugfs.c create mode 100644 arch/x86/kernel/kexec-bzimage64.c create mode 100644 arch/x86/kernel/kgdb.c create mode 100644 arch/x86/kernel/kprobes/Makefile create mode 100644 arch/x86/kernel/kprobes/common.h create mode 100644 arch/x86/kernel/kprobes/core.c create mode 100644 arch/x86/kernel/kprobes/ftrace.c create mode 100644 arch/x86/kernel/kprobes/opt.c create mode 100644 arch/x86/kernel/ksysfs.c create mode 100644 arch/x86/kernel/kvm.c create mode 100644 arch/x86/kernel/kvmclock.c create mode 100644 arch/x86/kernel/ldt.c create mode 100644 arch/x86/kernel/machine_kexec_32.c create mode 100644 arch/x86/kernel/machine_kexec_64.c create mode 100644 arch/x86/kernel/mmconf-fam10h_64.c create mode 100644 arch/x86/kernel/module.c create mode 100644 arch/x86/kernel/mpparse.c create mode 100644 arch/x86/kernel/msr.c create mode 100644 arch/x86/kernel/nmi.c create mode 100644 arch/x86/kernel/nmi_selftest.c create mode 100644 arch/x86/kernel/paravirt-spinlocks.c create mode 100644 arch/x86/kernel/paravirt.c create mode 100644 arch/x86/kernel/pci-dma.c create mode 100644 arch/x86/kernel/pcspeaker.c create mode 100644 arch/x86/kernel/perf_regs.c create mode 100644 arch/x86/kernel/platform-quirks.c create mode 100644 arch/x86/kernel/pmem.c create mode 100644 arch/x86/kernel/probe_roms.c create mode 100644 arch/x86/kernel/process.c create mode 100644 arch/x86/kernel/process.h create mode 100644 arch/x86/kernel/process_32.c create mode 100644 arch/x86/kernel/process_64.c create mode 100644 arch/x86/kernel/ptrace.c create mode 100644 arch/x86/kernel/pvclock.c create mode 100644 arch/x86/kernel/quirks.c create mode 100644 arch/x86/kernel/reboot.c create mode 100644 arch/x86/kernel/reboot_fixups_32.c create mode 100644 arch/x86/kernel/relocate_kernel_32.S create mode 100644 arch/x86/kernel/relocate_kernel_64.S create mode 100644 arch/x86/kernel/resource.c create mode 100644 arch/x86/kernel/rethook.c create mode 100644 arch/x86/kernel/rtc.c create mode 100644 arch/x86/kernel/setup.c create mode 100644 arch/x86/kernel/setup_percpu.c create mode 100644 arch/x86/kernel/sev-shared.c create mode 100644 arch/x86/kernel/sev.c create mode 100644 arch/x86/kernel/sev_verify_cbit.S create mode 100644 arch/x86/kernel/signal.c create mode 100644 arch/x86/kernel/signal_compat.c create mode 100644 arch/x86/kernel/smp.c create mode 100644 arch/x86/kernel/smpboot.c create mode 100644 arch/x86/kernel/stacktrace.c create mode 100644 arch/x86/kernel/static_call.c create mode 100644 arch/x86/kernel/step.c create mode 100644 arch/x86/kernel/sys_ia32.c create mode 100644 arch/x86/kernel/sys_x86_64.c create mode 100644 arch/x86/kernel/tboot.c create mode 100644 arch/x86/kernel/time.c create mode 100644 arch/x86/kernel/tls.c create mode 100644 arch/x86/kernel/tls.h create mode 100644 arch/x86/kernel/topology.c create mode 100644 arch/x86/kernel/trace.c create mode 100644 arch/x86/kernel/trace_clock.c create mode 100644 arch/x86/kernel/tracepoint.c create mode 100644 arch/x86/kernel/traps.c create mode 100644 arch/x86/kernel/tsc.c create mode 100644 arch/x86/kernel/tsc_msr.c create mode 100644 arch/x86/kernel/tsc_sync.c create mode 100644 arch/x86/kernel/umip.c create mode 100644 arch/x86/kernel/unwind_frame.c create mode 100644 arch/x86/kernel/unwind_guess.c create mode 100644 arch/x86/kernel/unwind_orc.c create mode 100644 arch/x86/kernel/uprobes.c create mode 100644 arch/x86/kernel/verify_cpu.S create mode 100644 arch/x86/kernel/vm86_32.c create mode 100644 arch/x86/kernel/vmlinux.lds.S create mode 100644 arch/x86/kernel/vsmp_64.c create mode 100644 arch/x86/kernel/x86_init.c create mode 100644 arch/x86/kvm/.gitignore create mode 100644 arch/x86/kvm/Kconfig create mode 100644 arch/x86/kvm/Makefile create mode 100644 arch/x86/kvm/cpuid.c create mode 100644 arch/x86/kvm/cpuid.h create mode 100644 arch/x86/kvm/debugfs.c create mode 100644 arch/x86/kvm/emulate.c create mode 100644 arch/x86/kvm/fpu.h create mode 100644 arch/x86/kvm/hyperv.c create mode 100644 arch/x86/kvm/hyperv.h create mode 100644 arch/x86/kvm/i8254.c create mode 100644 arch/x86/kvm/i8254.h create mode 100644 arch/x86/kvm/i8259.c create mode 100644 arch/x86/kvm/ioapic.c create mode 100644 arch/x86/kvm/ioapic.h create mode 100644 arch/x86/kvm/irq.c create mode 100644 arch/x86/kvm/irq.h create mode 100644 arch/x86/kvm/irq_comm.c create mode 100644 arch/x86/kvm/kvm-asm-offsets.c create mode 100644 arch/x86/kvm/kvm_cache_regs.h create mode 100644 arch/x86/kvm/kvm_emulate.h create mode 100644 arch/x86/kvm/kvm_onhyperv.c create mode 100644 arch/x86/kvm/kvm_onhyperv.h create mode 100644 arch/x86/kvm/lapic.c create mode 100644 arch/x86/kvm/lapic.h create mode 100644 arch/x86/kvm/mmu.h create mode 100644 arch/x86/kvm/mmu/mmu.c create mode 100644 arch/x86/kvm/mmu/mmu_internal.h create mode 100644 arch/x86/kvm/mmu/mmutrace.h create mode 100644 arch/x86/kvm/mmu/page_track.c create mode 100644 arch/x86/kvm/mmu/paging_tmpl.h create mode 100644 arch/x86/kvm/mmu/spte.c create mode 100644 arch/x86/kvm/mmu/spte.h create mode 100644 arch/x86/kvm/mmu/tdp_iter.c create mode 100644 arch/x86/kvm/mmu/tdp_iter.h create mode 100644 arch/x86/kvm/mmu/tdp_mmu.c create mode 100644 arch/x86/kvm/mmu/tdp_mmu.h create mode 100644 arch/x86/kvm/mtrr.c create mode 100644 arch/x86/kvm/pmu.c create mode 100644 arch/x86/kvm/pmu.h create mode 100644 arch/x86/kvm/reverse_cpuid.h create mode 100644 arch/x86/kvm/svm/avic.c create mode 100644 arch/x86/kvm/svm/hyperv.h create mode 100644 arch/x86/kvm/svm/nested.c create mode 100644 arch/x86/kvm/svm/pmu.c create mode 100644 arch/x86/kvm/svm/sev.c create mode 100644 arch/x86/kvm/svm/svm.c create mode 100644 arch/x86/kvm/svm/svm.h create mode 100644 arch/x86/kvm/svm/svm_onhyperv.c create mode 100644 arch/x86/kvm/svm/svm_onhyperv.h create mode 100644 arch/x86/kvm/svm/svm_ops.h create mode 100644 arch/x86/kvm/svm/vmenter.S create mode 100644 arch/x86/kvm/trace.h create mode 100644 arch/x86/kvm/tss.h create mode 100644 arch/x86/kvm/vmx/capabilities.h create mode 100644 arch/x86/kvm/vmx/evmcs.c create mode 100644 arch/x86/kvm/vmx/evmcs.h create mode 100644 arch/x86/kvm/vmx/nested.c create mode 100644 arch/x86/kvm/vmx/nested.h create mode 100644 arch/x86/kvm/vmx/pmu_intel.c create mode 100644 arch/x86/kvm/vmx/posted_intr.c create mode 100644 arch/x86/kvm/vmx/posted_intr.h create mode 100644 arch/x86/kvm/vmx/run_flags.h create mode 100644 arch/x86/kvm/vmx/sgx.c create mode 100644 arch/x86/kvm/vmx/sgx.h create mode 100644 arch/x86/kvm/vmx/vmcs.h create mode 100644 arch/x86/kvm/vmx/vmcs12.c create mode 100644 arch/x86/kvm/vmx/vmcs12.h create mode 100644 arch/x86/kvm/vmx/vmcs_shadow_fields.h create mode 100644 arch/x86/kvm/vmx/vmenter.S create mode 100644 arch/x86/kvm/vmx/vmx.c create mode 100644 arch/x86/kvm/vmx/vmx.h create mode 100644 arch/x86/kvm/vmx/vmx_ops.h create mode 100644 arch/x86/kvm/x86.c create mode 100644 arch/x86/kvm/x86.h create mode 100644 arch/x86/kvm/xen.c create mode 100644 arch/x86/kvm/xen.h create mode 100644 arch/x86/lib/.gitignore create mode 100644 arch/x86/lib/Makefile create mode 100644 arch/x86/lib/atomic64_32.c create mode 100644 arch/x86/lib/atomic64_386_32.S create mode 100644 arch/x86/lib/atomic64_cx8_32.S create mode 100644 arch/x86/lib/cache-smp.c create mode 100644 arch/x86/lib/checksum_32.S create mode 100644 arch/x86/lib/clear_page_64.S create mode 100644 arch/x86/lib/cmdline.c create mode 100644 arch/x86/lib/cmpxchg16b_emu.S create mode 100644 arch/x86/lib/cmpxchg8b_emu.S create mode 100644 arch/x86/lib/copy_mc.c create mode 100644 arch/x86/lib/copy_mc_64.S create mode 100644 arch/x86/lib/copy_page_64.S create mode 100644 arch/x86/lib/copy_user_64.S create mode 100644 arch/x86/lib/cpu.c create mode 100644 arch/x86/lib/csum-copy_64.S create mode 100644 arch/x86/lib/csum-partial_64.c create mode 100644 arch/x86/lib/csum-wrappers_64.c create mode 100644 arch/x86/lib/delay.c create mode 100644 arch/x86/lib/error-inject.c create mode 100644 arch/x86/lib/getuser.S create mode 100644 arch/x86/lib/hweight.S create mode 100644 arch/x86/lib/inat.c create mode 100644 arch/x86/lib/insn-eval.c create mode 100644 arch/x86/lib/insn.c create mode 100644 arch/x86/lib/iomap_copy_64.S create mode 100644 arch/x86/lib/iomem.c create mode 100644 arch/x86/lib/kaslr.c create mode 100644 arch/x86/lib/memcpy_32.c create mode 100644 arch/x86/lib/memcpy_64.S create mode 100644 arch/x86/lib/memmove_64.S create mode 100644 arch/x86/lib/memset_64.S create mode 100644 arch/x86/lib/misc.c create mode 100644 arch/x86/lib/msr-reg-export.c create mode 100644 arch/x86/lib/msr-reg.S create mode 100644 arch/x86/lib/msr-smp.c create mode 100644 arch/x86/lib/msr.c create mode 100644 arch/x86/lib/pc-conf-reg.c create mode 100644 arch/x86/lib/putuser.S create mode 100644 arch/x86/lib/retpoline.S create mode 100644 arch/x86/lib/string_32.c create mode 100644 arch/x86/lib/strstr_32.c create mode 100644 arch/x86/lib/usercopy.c create mode 100644 arch/x86/lib/usercopy_32.c create mode 100644 arch/x86/lib/usercopy_64.c create mode 100644 arch/x86/lib/x86-opcode-map.txt create mode 100644 arch/x86/math-emu/Makefile create mode 100644 arch/x86/math-emu/README create mode 100644 arch/x86/math-emu/control_w.h create mode 100644 arch/x86/math-emu/div_Xsig.S create mode 100644 arch/x86/math-emu/div_small.S create mode 100644 arch/x86/math-emu/errors.c create mode 100644 arch/x86/math-emu/exception.h create mode 100644 arch/x86/math-emu/fpu_arith.c create mode 100644 arch/x86/math-emu/fpu_asm.h create mode 100644 arch/x86/math-emu/fpu_aux.c create mode 100644 arch/x86/math-emu/fpu_emu.h create mode 100644 arch/x86/math-emu/fpu_entry.c create mode 100644 arch/x86/math-emu/fpu_etc.c create mode 100644 arch/x86/math-emu/fpu_proto.h create mode 100644 arch/x86/math-emu/fpu_system.h create mode 100644 arch/x86/math-emu/fpu_tags.c create mode 100644 arch/x86/math-emu/fpu_trig.c create mode 100644 arch/x86/math-emu/get_address.c create mode 100644 arch/x86/math-emu/load_store.c create mode 100644 arch/x86/math-emu/mul_Xsig.S create mode 100644 arch/x86/math-emu/poly.h create mode 100644 arch/x86/math-emu/poly_2xm1.c create mode 100644 arch/x86/math-emu/poly_atan.c create mode 100644 arch/x86/math-emu/poly_l2.c create mode 100644 arch/x86/math-emu/poly_sin.c create mode 100644 arch/x86/math-emu/poly_tan.c create mode 100644 arch/x86/math-emu/polynom_Xsig.S create mode 100644 arch/x86/math-emu/reg_add_sub.c create mode 100644 arch/x86/math-emu/reg_compare.c create mode 100644 arch/x86/math-emu/reg_constant.c create mode 100644 arch/x86/math-emu/reg_constant.h create mode 100644 arch/x86/math-emu/reg_convert.c create mode 100644 arch/x86/math-emu/reg_divide.c create mode 100644 arch/x86/math-emu/reg_ld_str.c create mode 100644 arch/x86/math-emu/reg_mul.c create mode 100644 arch/x86/math-emu/reg_norm.S create mode 100644 arch/x86/math-emu/reg_round.S create mode 100644 arch/x86/math-emu/reg_u_add.S create mode 100644 arch/x86/math-emu/reg_u_div.S create mode 100644 arch/x86/math-emu/reg_u_mul.S create mode 100644 arch/x86/math-emu/reg_u_sub.S create mode 100644 arch/x86/math-emu/round_Xsig.S create mode 100644 arch/x86/math-emu/shr_Xsig.S create mode 100644 arch/x86/math-emu/status_w.h create mode 100644 arch/x86/math-emu/version.h create mode 100644 arch/x86/math-emu/wm_shrx.S create mode 100644 arch/x86/math-emu/wm_sqrt.S create mode 100644 arch/x86/mm/Makefile create mode 100644 arch/x86/mm/amdtopology.c create mode 100644 arch/x86/mm/cpu_entry_area.c create mode 100644 arch/x86/mm/debug_pagetables.c create mode 100644 arch/x86/mm/dump_pagetables.c create mode 100644 arch/x86/mm/extable.c create mode 100644 arch/x86/mm/fault.c create mode 100644 arch/x86/mm/highmem_32.c create mode 100644 arch/x86/mm/hugetlbpage.c create mode 100644 arch/x86/mm/ident_map.c create mode 100644 arch/x86/mm/init.c create mode 100644 arch/x86/mm/init_32.c create mode 100644 arch/x86/mm/init_64.c create mode 100644 arch/x86/mm/iomap_32.c create mode 100644 arch/x86/mm/ioremap.c create mode 100644 arch/x86/mm/kasan_init_64.c create mode 100644 arch/x86/mm/kaslr.c create mode 100644 arch/x86/mm/kmmio.c create mode 100644 arch/x86/mm/kmsan_shadow.c create mode 100644 arch/x86/mm/maccess.c create mode 100644 arch/x86/mm/mem_encrypt.c create mode 100644 arch/x86/mm/mem_encrypt_amd.c create mode 100644 arch/x86/mm/mem_encrypt_boot.S create mode 100644 arch/x86/mm/mem_encrypt_identity.c create mode 100644 arch/x86/mm/mm_internal.h create mode 100644 arch/x86/mm/mmap.c create mode 100644 arch/x86/mm/mmio-mod.c create mode 100644 arch/x86/mm/numa.c create mode 100644 arch/x86/mm/numa_32.c create mode 100644 arch/x86/mm/numa_64.c create mode 100644 arch/x86/mm/numa_emulation.c create mode 100644 arch/x86/mm/numa_internal.h create mode 100644 arch/x86/mm/pat/Makefile create mode 100644 arch/x86/mm/pat/cpa-test.c create mode 100644 arch/x86/mm/pat/memtype.c create mode 100644 arch/x86/mm/pat/memtype.h create mode 100644 arch/x86/mm/pat/memtype_interval.c create mode 100644 arch/x86/mm/pat/set_memory.c create mode 100644 arch/x86/mm/pf_in.c create mode 100644 arch/x86/mm/pf_in.h create mode 100644 arch/x86/mm/pgprot.c create mode 100644 arch/x86/mm/pgtable.c create mode 100644 arch/x86/mm/pgtable_32.c create mode 100644 arch/x86/mm/physaddr.c create mode 100644 arch/x86/mm/physaddr.h create mode 100644 arch/x86/mm/pkeys.c create mode 100644 arch/x86/mm/pti.c create mode 100644 arch/x86/mm/srat.c create mode 100644 arch/x86/mm/testmmiotrace.c create mode 100644 arch/x86/mm/tlb.c create mode 100644 arch/x86/net/Makefile create mode 100644 arch/x86/net/bpf_jit_comp.c create mode 100644 arch/x86/net/bpf_jit_comp32.c create mode 100644 arch/x86/pci/Makefile create mode 100644 arch/x86/pci/acpi.c create mode 100644 arch/x86/pci/amd_bus.c create mode 100644 arch/x86/pci/broadcom_bus.c create mode 100644 arch/x86/pci/bus_numa.c create mode 100644 arch/x86/pci/bus_numa.h create mode 100644 arch/x86/pci/ce4100.c create mode 100644 arch/x86/pci/common.c create mode 100644 arch/x86/pci/direct.c create mode 100644 arch/x86/pci/early.c create mode 100644 arch/x86/pci/fixup.c create mode 100644 arch/x86/pci/i386.c create mode 100644 arch/x86/pci/init.c create mode 100644 arch/x86/pci/intel_mid_pci.c create mode 100644 arch/x86/pci/irq.c create mode 100644 arch/x86/pci/legacy.c create mode 100644 arch/x86/pci/mmconfig-shared.c create mode 100644 arch/x86/pci/mmconfig_32.c create mode 100644 arch/x86/pci/mmconfig_64.c create mode 100644 arch/x86/pci/numachip.c create mode 100644 arch/x86/pci/olpc.c create mode 100644 arch/x86/pci/pcbios.c create mode 100644 arch/x86/pci/sta2x11-fixup.c create mode 100644 arch/x86/pci/xen.c create mode 100644 arch/x86/platform/Makefile create mode 100644 arch/x86/platform/atom/Makefile create mode 100644 arch/x86/platform/atom/punit_atom_debug.c create mode 100644 arch/x86/platform/ce4100/Makefile create mode 100644 arch/x86/platform/ce4100/ce4100.c create mode 100644 arch/x86/platform/ce4100/falconfalls.dts create mode 100644 arch/x86/platform/efi/Makefile create mode 100644 arch/x86/platform/efi/efi.c create mode 100644 arch/x86/platform/efi/efi_32.c create mode 100644 arch/x86/platform/efi/efi_64.c create mode 100644 arch/x86/platform/efi/efi_stub_32.S create mode 100644 arch/x86/platform/efi/efi_stub_64.S create mode 100644 arch/x86/platform/efi/efi_thunk_64.S create mode 100644 arch/x86/platform/efi/quirks.c create mode 100644 arch/x86/platform/geode/Makefile create mode 100644 arch/x86/platform/geode/alix.c create mode 100644 arch/x86/platform/geode/geos.c create mode 100644 arch/x86/platform/geode/net5501.c create mode 100644 arch/x86/platform/intel-mid/Makefile create mode 100644 arch/x86/platform/intel-mid/intel-mid.c create mode 100644 arch/x86/platform/intel-mid/pwr.c create mode 100644 arch/x86/platform/intel-quark/Makefile create mode 100644 arch/x86/platform/intel-quark/imr.c create mode 100644 arch/x86/platform/intel-quark/imr_selftest.c create mode 100644 arch/x86/platform/intel/Makefile create mode 100644 arch/x86/platform/intel/iosf_mbi.c create mode 100644 arch/x86/platform/iris/Makefile create mode 100644 arch/x86/platform/iris/iris.c create mode 100644 arch/x86/platform/olpc/Makefile create mode 100644 arch/x86/platform/olpc/olpc-xo1-pm.c create mode 100644 arch/x86/platform/olpc/olpc-xo1-rtc.c create mode 100644 arch/x86/platform/olpc/olpc-xo1-sci.c create mode 100644 arch/x86/platform/olpc/olpc-xo15-sci.c create mode 100644 arch/x86/platform/olpc/olpc.c create mode 100644 arch/x86/platform/olpc/olpc_dt.c create mode 100644 arch/x86/platform/olpc/olpc_ofw.c create mode 100644 arch/x86/platform/olpc/xo1-wakeup.S create mode 100644 arch/x86/platform/pvh/Makefile create mode 100644 arch/x86/platform/pvh/enlighten.c create mode 100644 arch/x86/platform/pvh/head.S create mode 100644 arch/x86/platform/scx200/Makefile create mode 100644 arch/x86/platform/scx200/scx200_32.c create mode 100644 arch/x86/platform/ts5500/Makefile create mode 100644 arch/x86/platform/ts5500/ts5500.c create mode 100644 arch/x86/platform/uv/Makefile create mode 100644 arch/x86/platform/uv/bios_uv.c create mode 100644 arch/x86/platform/uv/uv_irq.c create mode 100644 arch/x86/platform/uv/uv_nmi.c create mode 100644 arch/x86/platform/uv/uv_time.c create mode 100644 arch/x86/power/Makefile create mode 100644 arch/x86/power/cpu.c create mode 100644 arch/x86/power/hibernate.c create mode 100644 arch/x86/power/hibernate_32.c create mode 100644 arch/x86/power/hibernate_64.c create mode 100644 arch/x86/power/hibernate_asm_32.S create mode 100644 arch/x86/power/hibernate_asm_64.S create mode 100644 arch/x86/purgatory/.gitignore create mode 100644 arch/x86/purgatory/Makefile create mode 100644 arch/x86/purgatory/entry64.S create mode 100644 arch/x86/purgatory/kexec-purgatory.S create mode 100644 arch/x86/purgatory/purgatory.c create mode 100644 arch/x86/purgatory/setup-x86_64.S create mode 100644 arch/x86/purgatory/stack.S create mode 100644 arch/x86/ras/Kconfig create mode 100644 arch/x86/realmode/Makefile create mode 100644 arch/x86/realmode/init.c create mode 100644 arch/x86/realmode/rm/.gitignore create mode 100644 arch/x86/realmode/rm/Makefile create mode 100644 arch/x86/realmode/rm/bioscall.S create mode 100644 arch/x86/realmode/rm/copy.S create mode 100644 arch/x86/realmode/rm/header.S create mode 100644 arch/x86/realmode/rm/realmode.h create mode 100644 arch/x86/realmode/rm/realmode.lds.S create mode 100644 arch/x86/realmode/rm/reboot.S create mode 100644 arch/x86/realmode/rm/regs.c create mode 100644 arch/x86/realmode/rm/stack.S create mode 100644 arch/x86/realmode/rm/trampoline_32.S create mode 100644 arch/x86/realmode/rm/trampoline_64.S create mode 100644 arch/x86/realmode/rm/trampoline_common.S create mode 100644 arch/x86/realmode/rm/video-bios.c create mode 100644 arch/x86/realmode/rm/video-mode.c create mode 100644 arch/x86/realmode/rm/video-vesa.c create mode 100644 arch/x86/realmode/rm/video-vga.c create mode 100644 arch/x86/realmode/rm/wakemain.c create mode 100644 arch/x86/realmode/rm/wakeup.h create mode 100644 arch/x86/realmode/rm/wakeup_asm.S create mode 100644 arch/x86/realmode/rmpiggy.S create mode 100644 arch/x86/tools/.gitignore create mode 100644 arch/x86/tools/Makefile create mode 100644 arch/x86/tools/chkobjdump.awk create mode 100644 arch/x86/tools/gen-insn-attr-x86.awk create mode 100644 arch/x86/tools/insn_decoder_test.c create mode 100644 arch/x86/tools/insn_sanity.c create mode 100644 arch/x86/tools/objdump_reformat.awk create mode 100644 arch/x86/tools/relocs.c create mode 100644 arch/x86/tools/relocs.h create mode 100644 arch/x86/tools/relocs_32.c create mode 100644 arch/x86/tools/relocs_64.c create mode 100644 arch/x86/tools/relocs_common.c create mode 100644 arch/x86/um/Kconfig create mode 100644 arch/x86/um/Makefile create mode 100644 arch/x86/um/asm/apic.h create mode 100644 arch/x86/um/asm/arch_hweight.h create mode 100644 arch/x86/um/asm/archparam.h create mode 100644 arch/x86/um/asm/barrier.h create mode 100644 arch/x86/um/asm/checksum.h create mode 100644 arch/x86/um/asm/checksum_32.h create mode 100644 arch/x86/um/asm/checksum_64.h create mode 100644 arch/x86/um/asm/desc.h create mode 100644 arch/x86/um/asm/elf.h create mode 100644 arch/x86/um/asm/irq_vectors.h create mode 100644 arch/x86/um/asm/mm_context.h create mode 100644 arch/x86/um/asm/module.h create mode 100644 arch/x86/um/asm/processor.h create mode 100644 arch/x86/um/asm/processor_32.h create mode 100644 arch/x86/um/asm/processor_64.h create mode 100644 arch/x86/um/asm/ptrace.h create mode 100644 arch/x86/um/asm/required-features.h create mode 100644 arch/x86/um/asm/segment.h create mode 100644 arch/x86/um/asm/syscall.h create mode 100644 arch/x86/um/asm/vm-flags.h create mode 100644 arch/x86/um/bugs_32.c create mode 100644 arch/x86/um/bugs_64.c create mode 100644 arch/x86/um/checksum_32.S create mode 100644 arch/x86/um/delay.c create mode 100644 arch/x86/um/elfcore.c create mode 100644 arch/x86/um/fault.c create mode 100644 arch/x86/um/ldt.c create mode 100644 arch/x86/um/mem_32.c create mode 100644 arch/x86/um/mem_64.c create mode 100644 arch/x86/um/os-Linux/Makefile create mode 100644 arch/x86/um/os-Linux/mcontext.c create mode 100644 arch/x86/um/os-Linux/prctl.c create mode 100644 arch/x86/um/os-Linux/registers.c create mode 100644 arch/x86/um/os-Linux/task_size.c create mode 100644 arch/x86/um/os-Linux/tls.c create mode 100644 arch/x86/um/ptrace_32.c create mode 100644 arch/x86/um/ptrace_64.c create mode 100644 arch/x86/um/ptrace_user.c create mode 100644 arch/x86/um/setjmp_32.S create mode 100644 arch/x86/um/setjmp_64.S create mode 100644 arch/x86/um/shared/sysdep/archsetjmp.h create mode 100644 arch/x86/um/shared/sysdep/archsetjmp_32.h create mode 100644 arch/x86/um/shared/sysdep/archsetjmp_64.h create mode 100644 arch/x86/um/shared/sysdep/faultinfo.h create mode 100644 arch/x86/um/shared/sysdep/faultinfo_32.h create mode 100644 arch/x86/um/shared/sysdep/faultinfo_64.h create mode 100644 arch/x86/um/shared/sysdep/kernel-offsets.h create mode 100644 arch/x86/um/shared/sysdep/mcontext.h create mode 100644 arch/x86/um/shared/sysdep/ptrace.h create mode 100644 arch/x86/um/shared/sysdep/ptrace_32.h create mode 100644 arch/x86/um/shared/sysdep/ptrace_64.h create mode 100644 arch/x86/um/shared/sysdep/ptrace_user.h create mode 100644 arch/x86/um/shared/sysdep/stub.h create mode 100644 arch/x86/um/shared/sysdep/stub_32.h create mode 100644 arch/x86/um/shared/sysdep/stub_64.h create mode 100644 arch/x86/um/shared/sysdep/syscalls.h create mode 100644 arch/x86/um/shared/sysdep/syscalls_32.h create mode 100644 arch/x86/um/shared/sysdep/syscalls_64.h create mode 100644 arch/x86/um/shared/sysdep/tls.h create mode 100644 arch/x86/um/signal.c create mode 100644 arch/x86/um/stub_32.S create mode 100644 arch/x86/um/stub_64.S create mode 100644 arch/x86/um/stub_segv.c create mode 100644 arch/x86/um/sys_call_table_32.c create mode 100644 arch/x86/um/sys_call_table_64.c create mode 100644 arch/x86/um/syscalls_32.c create mode 100644 arch/x86/um/syscalls_64.c create mode 100644 arch/x86/um/sysrq_32.c create mode 100644 arch/x86/um/sysrq_64.c create mode 100644 arch/x86/um/tls_32.c create mode 100644 arch/x86/um/tls_64.c create mode 100644 arch/x86/um/user-offsets.c create mode 100644 arch/x86/um/vdso/.gitignore create mode 100644 arch/x86/um/vdso/Makefile create mode 100644 arch/x86/um/vdso/checkundef.sh create mode 100644 arch/x86/um/vdso/um_vdso.c create mode 100644 arch/x86/um/vdso/vdso-layout.lds.S create mode 100644 arch/x86/um/vdso/vdso-note.S create mode 100644 arch/x86/um/vdso/vdso.S create mode 100644 arch/x86/um/vdso/vdso.lds.S create mode 100644 arch/x86/um/vdso/vma.c create mode 100644 arch/x86/video/Makefile create mode 100644 arch/x86/video/fbdev.c create mode 100644 arch/x86/virt/vmx/tdx/tdxcall.S create mode 100644 arch/x86/xen/Kconfig create mode 100644 arch/x86/xen/Makefile create mode 100644 arch/x86/xen/apic.c create mode 100644 arch/x86/xen/debugfs.c create mode 100644 arch/x86/xen/debugfs.h create mode 100644 arch/x86/xen/efi.c create mode 100644 arch/x86/xen/enlighten.c create mode 100644 arch/x86/xen/enlighten_hvm.c create mode 100644 arch/x86/xen/enlighten_pv.c create mode 100644 arch/x86/xen/enlighten_pvh.c create mode 100644 arch/x86/xen/grant-table.c create mode 100644 arch/x86/xen/irq.c create mode 100644 arch/x86/xen/mmu.c create mode 100644 arch/x86/xen/mmu.h create mode 100644 arch/x86/xen/mmu_hvm.c create mode 100644 arch/x86/xen/mmu_pv.c create mode 100644 arch/x86/xen/multicalls.c create mode 100644 arch/x86/xen/multicalls.h create mode 100644 arch/x86/xen/p2m.c create mode 100644 arch/x86/xen/platform-pci-unplug.c create mode 100644 arch/x86/xen/pmu.c create mode 100644 arch/x86/xen/pmu.h create mode 100644 arch/x86/xen/setup.c create mode 100644 arch/x86/xen/smp.c create mode 100644 arch/x86/xen/smp.h create mode 100644 arch/x86/xen/smp_hvm.c create mode 100644 arch/x86/xen/smp_pv.c create mode 100644 arch/x86/xen/spinlock.c create mode 100644 arch/x86/xen/suspend.c create mode 100644 arch/x86/xen/suspend_hvm.c create mode 100644 arch/x86/xen/suspend_pv.c create mode 100644 arch/x86/xen/time.c create mode 100644 arch/x86/xen/trace.c create mode 100644 arch/x86/xen/vga.c create mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-head.S create mode 100644 arch/x86/xen/xen-ops.h (limited to 'arch/x86') diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore new file mode 100644 index 000000000..f2e1d6c34 --- /dev/null +++ b/arch/x86/.gitignore @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +boot/compressed/vmlinux +tools/test_get_len +tools/insn_sanity +tools/insn_decoder_test +purgatory/purgatory.ro diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild new file mode 100644 index 000000000..5a83da703 --- /dev/null +++ b/arch/x86/Kbuild @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/ + +obj-y += entry/ + +obj-$(CONFIG_PERF_EVENTS) += events/ + +obj-$(CONFIG_KVM) += kvm/ + +# Xen paravirtualization support +obj-$(CONFIG_XEN) += xen/ + +obj-$(CONFIG_PVH) += platform/pvh/ + +# Hyper-V paravirtualization support +obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/ + +obj-y += realmode/ +obj-y += kernel/ +obj-y += mm/ + +obj-y += crypto/ + +obj-$(CONFIG_IA32_EMULATION) += ia32/ + +obj-y += platform/ +obj-y += net/ + +obj-$(CONFIG_KEXEC_FILE) += purgatory/ + +# for cleaning +subdir- += boot tools diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig new file mode 100644 index 000000000..4c9bfc4be --- /dev/null +++ b/arch/x86/Kconfig @@ -0,0 +1,2976 @@ +# SPDX-License-Identifier: GPL-2.0 +# Select 32 or 64 bit +config 64BIT + bool "64-bit kernel" if "$(ARCH)" = "x86" + default "$(ARCH)" != "i386" + help + Say yes to build a 64-bit kernel - formerly known as x86_64 + Say no to build a 32-bit kernel - formerly known as i386 + +config X86_32 + def_bool y + depends on !64BIT + # Options that are inherently 32-bit kernel only: + select ARCH_WANT_IPC_PARSE_VERSION + select CLKSRC_I8253 + select CLONE_BACKWARDS + select GENERIC_VDSO_32 + select HAVE_DEBUG_STACKOVERFLOW + select KMAP_LOCAL + select MODULES_USE_ELF_REL + select OLD_SIGACTION + select ARCH_SPLIT_ARG64 + +config X86_64 + def_bool y + depends on 64BIT + # Options that are inherently 64-bit kernel only: + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_SOFT_DIRTY + select MODULES_USE_ELF_RELA + select NEED_DMA_MAP_STATE + select SWIOTLB + select ARCH_HAS_ELFCORE_COMPAT + select ZONE_DMA32 + +config FORCE_DYNAMIC_FTRACE + def_bool y + depends on X86_32 + depends on FUNCTION_TRACER + select DYNAMIC_FTRACE + help + We keep the static function tracing (!DYNAMIC_FTRACE) around + in order to test the non static function tracing in the + generic code, as other architectures still use it. But we + only need to keep it around for x86_64. No need to keep it + for x86_32. For x86_32, force DYNAMIC_FTRACE. +# +# Arch settings +# +# ( Note that options that are marked 'if X86_64' could in principle be +# ported to 32-bit as well. ) +# +config X86 + def_bool y + # + # Note: keep this list sorted alphabetically + # + select ACPI_LEGACY_TABLES_LOOKUP if ACPI + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ARCH_32BIT_OFF_T if X86_32 + select ARCH_CLOCKSOURCE_INIT + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE + select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION + select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 + select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG + select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE) + select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE + select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CURRENT_STACK_POINTER + select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_EARLY_DEBUG if KGDB + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_PTE_DEVMAP if X86_64 + select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2 + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_COPY_MC if X86_64 + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE + select ARCH_HAS_SYSCALL_WRAPPER + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_DEBUG_WX + select ARCH_HAS_ZONE_DMA_SET if EXPERT + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_STACKWALK + select ARCH_SUPPORTS_ACPI + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64 + select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 + select ARCH_SUPPORTS_CFI_CLANG if X86_64 + select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_USE_SYM_ANNOTATIONS + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 + select ARCH_WANTS_DYNAMIC_TASK_STRUCT + select ARCH_WANTS_NO_INSTR + select ARCH_WANT_GENERAL_HUGETLB + select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP if X86_64 + select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_THP_SWAP if X86_64 + select ARCH_HAS_PARANOID_L1D_FLUSH + select BUILDTIME_TABLE_SORT + select CLKEVT_I8253 + select CLOCKSOURCE_VALIDATE_LAST_CYCLE + select CLOCKSOURCE_WATCHDOG + # Word-size accesses may read uninitialized data past the trailing \0 + # in strings and cause false KMSAN reports. + select DCACHE_WORD_ACCESS if !KMSAN + select DYNAMIC_SIGFRAME + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) + select GENERIC_CLOCKEVENTS_MIN_ADJUST + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES + select GENERIC_EARLY_IOREMAP + select GENERIC_ENTRY + select GENERIC_IOMAP + select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP + select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC + select GENERIC_IRQ_MIGRATION if SMP + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_RESERVATION_MODE + select GENERIC_IRQ_SHOW + select GENERIC_PENDING_IRQ if SMP + select GENERIC_PTDUMP + select GENERIC_SMP_IDLE_THREAD + select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS + select GUP_GET_PTE_LOW_HIGH if X86_PAE + select HARDIRQS_SW_RESEND + select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 + select HAVE_ACPI_APEI if ACPI + select HAVE_ACPI_APEI_NMI if ACPI + select HAVE_ALIGNED_STRUCT_PAGE if SLUB + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE + select HAVE_ARCH_HUGE_VMALLOC if X86_64 + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_KASAN if X86_64 + select HAVE_ARCH_KASAN_VMALLOC if X86_64 + select HAVE_ARCH_KFENCE + select HAVE_ARCH_KMSAN if X86_64 + select HAVE_ARCH_KGDB + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT + select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST + select HAVE_ARCH_STACKLEAK + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 + select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD + select HAVE_ARCH_USERFAULTFD_MINOR if X86_64 && USERFAULTFD + select HAVE_ARCH_VMAP_STACK if X86_64 + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET + select HAVE_ARCH_WITHIN_STACK_FRAMES + select HAVE_ASM_MODVERSIONS + select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL + select HAVE_CONTEXT_TRACKING_USER if X86_64 + select HAVE_CONTEXT_TRACKING_USER_OFFSTACK if HAVE_CONTEXT_TRACKING_USER + select HAVE_C_RECORDMCOUNT + select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL + select HAVE_BUILDTIME_MCOUNT_SORT + select HAVE_DEBUG_KMEMLEAK + select HAVE_DMA_CONTIGUOUS + select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64 + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_SAMPLE_FTRACE_DIRECT if X86_64 + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64 + select HAVE_EBPF_JIT + select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_EISA + select HAVE_EXIT_THREAD + select HAVE_FAST_GUP + select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER if X86_32 || (X86_64 && DYNAMIC_FTRACE) + select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS + select HAVE_HW_BREAKPOINT + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_JUMP_LABEL_HACK if HAVE_OBJTOOL + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KERNEL_ZSTD + select HAVE_KPROBES + select HAVE_KPROBES_ON_FTRACE + select HAVE_FUNCTION_ERROR_INJECTION + select HAVE_KRETPROBES + select HAVE_RETHOOK + select HAVE_KVM + select HAVE_LIVEPATCH if X86_64 + select HAVE_MIXED_BREAKPOINTS_REGS + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_MOVE_PMD + select HAVE_MOVE_PUD + select HAVE_NOINSTR_HACK if HAVE_OBJTOOL + select HAVE_NMI + select HAVE_NOINSTR_VALIDATION if HAVE_OBJTOOL + select HAVE_OBJTOOL if X86_64 + select HAVE_OPTPROBES + select HAVE_PCSPKR_PLATFORM + select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI + select HAVE_PCI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_MERGE_VMAS + select HAVE_POSIX_CPU_TIMERS_TASK_WORK + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION + select HAVE_FUNCTION_ARG_ACCESS_API + select HAVE_SETUP_PER_CPU_AREA + select HAVE_SOFTIRQ_ON_OWN_STACK + select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR + select HAVE_STACK_VALIDATION if HAVE_OBJTOOL + select HAVE_STATIC_CALL + select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL + select HAVE_PREEMPT_DYNAMIC_CALL + select HAVE_RSEQ + select HAVE_RUST if X86_64 + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL + select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_USER_RETURN_NOTIFIER + select HAVE_GENERIC_VDSO + select HOTPLUG_SMT if SMP + select IRQ_FORCED_THREADING + select LOCK_MM_AND_FIND_VMA + select NEED_PER_CPU_EMBED_FIRST_CHUNK + select NEED_PER_CPU_PAGE_FIRST_CHUNK + select NEED_SG_DMA_LENGTH + select PCI_DOMAINS if PCI + select PCI_LOCKLESS_CONFIG if PCI + select PERF_EVENTS + select RTC_LIB + select RTC_MC146818_LIB + select SPARSE_IRQ + select SRCU + select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK + select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT + select USER_STACKTRACE_SUPPORT + select HAVE_ARCH_KCSAN if X86_64 + select X86_FEATURE_NAMES if PROC_FS + select PROC_PID_ARCH_STATUS if PROC_FS + select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX + imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE + +config INSTRUCTION_DECODER + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES + +config OUTPUT_FORMAT + string + default "elf32-i386" if X86_32 + default "elf64-x86-64" if X86_64 + +config LOCKDEP_SUPPORT + def_bool y + +config STACKTRACE_SUPPORT + def_bool y + +config MMU + def_bool y + +config ARCH_MMAP_RND_BITS_MIN + default 28 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 32 if 64BIT + default 16 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + +config SBUS + bool + +config GENERIC_ISA_DMA + def_bool y + depends on ISA_DMA_API + +config GENERIC_CSUM + bool + default y if KMSAN || KASAN + +config GENERIC_BUG + def_bool y + depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if X86_64 + +config GENERIC_BUG_RELATIVE_POINTERS + bool + +config ARCH_MAY_HAVE_PC_FDC + def_bool y + depends on ISA_DMA_API + +config GENERIC_CALIBRATE_DELAY + def_bool y + +config ARCH_HAS_CPU_RELAX + def_bool y + +config ARCH_HIBERNATION_POSSIBLE + def_bool y + +config ARCH_NR_GPIO + int + default 1024 if X86_64 + default 512 + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +config AUDIT_ARCH + def_bool y if X86_64 + +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + +config HAVE_INTEL_TXT + def_bool y + depends on INTEL_IOMMU && ACPI + +config X86_32_SMP + def_bool y + depends on X86_32 && SMP + +config X86_64_SMP + def_bool y + depends on X86_64 && SMP + +config ARCH_SUPPORTS_UPROBES + def_bool y + +config FIX_EARLYCON_MEM + def_bool y + +config DYNAMIC_PHYSICAL_MASK + bool + +config PGTABLE_LEVELS + int + default 5 if X86_5LEVEL + default 4 if X86_64 + default 3 if X86_PAE + default 2 + +config CC_HAS_SANE_STACKPROTECTOR + bool + default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT + default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) + help + We have to make sure stack protector is unconditionally disabled if + the compiler produces broken code or if it does not let us control + the segment on 32-bit kernels. + +menu "Processor type and features" + +config SMP + bool "Symmetric multi-processing support" + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on uni- and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + uniprocessor machines. On a uniprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also , + and the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +config X86_FEATURE_NAMES + bool "Processor feature human-readable names" if EMBEDDED + default y + help + This option compiles in a table of x86 feature bits and corresponding + names. This is required to support /proc/cpuinfo and a few kernel + messages. You can disable this to save space, at the expense of + making those few kernel messages show numeric feature bits instead. + + If in doubt, say Y. + +config X86_X2APIC + bool "Support x2apic" + depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) + help + This enables x2apic support on CPUs that have this feature. + + This allows 32-bit apic IDs (so it can support very large systems), + and accesses the local apic via MSRs not via mmio. + + Some Intel systems circa 2022 and later are locked into x2APIC mode + and can not fall back to the legacy APIC modes if SGX or TDX are + enabled in the BIOS. They will boot with very reduced functionality + without enabling this option. + + If you don't know what to do here, say N. + +config X86_MPPARSE + bool "Enable MPS table" if ACPI + default y + depends on X86_LOCAL_APIC + help + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it + +config GOLDFISH + def_bool y + depends on X86_GOLDFISH + +config X86_CPU_RESCTRL + bool "x86 CPU resource control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) + select KERNFS + select PROC_CPU_RESCTRL if PROC_FS + help + Enable x86 CPU resource control support. + + Provide support for the allocation and monitoring of system resources + usage by the CPU. + + Intel calls this Intel Resource Director Technology + (Intel(R) RDT). More information about RDT can be found in the + Intel x86 Architecture Software Developer Manual. + + AMD calls this AMD Platform Quality of Service (AMD QoS). + More information about AMD QoS can be found in the AMD64 Technology + Platform Quality of Service Extensions manual. + + Say N if unsure. + +if X86_32 +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on SMP + help + This option is needed for the systems that have more than 8 CPUs. + +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y + help + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select support + for the following (non-PC) 32 bit x86 platforms: + Goldfish (Android emulator) + AMD Elan + RDC R-321x SoC + SGI 320/540 (Visual Workstation) + STA2X11-based (e.g. Northville) + Moorestown MID devices + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. +endif # X86_32 + +if X86_64 +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y + help + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select support + for the following (non-PC) 64 bit x86 platforms: + Numascale NumaChip + ScaleMP vSMP + SGI Ultraviolet + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. +endif # X86_64 +# This is an alphabetically sorted list of 64 bit extended platforms +# Please maintain the alphabetic order if and when there are additions +config X86_NUMACHIP + bool "Numascale NumaChip" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + depends on NUMA + depends on SMP + depends on X86_X2APIC + depends on PCI_MMCONFIG + help + Adds support for Numascale NumaChip large-SMP systems. Needed to + enable more than ~168 cores. + If you don't have one of these, you should say N here. + +config X86_VSMP + bool "ScaleMP vSMP" + select HYPERVISOR_GUEST + select PARAVIRT + depends on X86_64 && PCI + depends on X86_EXTENDED_PLATFORM + depends on SMP + help + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + depends on NUMA + depends on EFI + depends on KEXEC_CORE + depends on X86_X2APIC + depends on PCI + help + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + +# Following is an alphabetically sorted list of 32 bit extended platforms +# Please maintain the alphabetic order if and when there are additions + +config X86_GOLDFISH + bool "Goldfish (Virtual Platform)" + depends on X86_EXTENDED_PLATFORM + help + Enable support for the Goldfish virtual platform used primarily + for Android development. Unless you are building for the Android + Goldfish emulator say N here. + +config X86_INTEL_CE + bool "CE4100 TV platform" + depends on PCI + depends on PCI_GODIRECT + depends on X86_IO_APIC + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select X86_REBOOTFIXUPS + select OF + select OF_EARLY_FLATTREE + help + Select for the Intel CE media processor (CE4100) SOC. + This option compiles in support for the CE4100 SOC for settop + boxes and media devices. + +config X86_INTEL_MID + bool "Intel MID platform support" + depends on X86_EXTENDED_PLATFORM + depends on X86_PLATFORM_DEVICES + depends on PCI + depends on X86_64 || (PCI_GOANY && X86_32) + depends on X86_IO_APIC + select I2C + select DW_APB_TIMER + select INTEL_SCU_PCI + help + Select to build a kernel capable of supporting Intel MID (Mobile + Internet Device) platform systems which do not have the PCI legacy + interfaces. If you are building for a PC class system say N here. + + Intel MID platforms are based on an Intel processor and chipset which + consume less power than most of the x86 derivatives. + +config X86_INTEL_QUARK + bool "Intel Quark platform support" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + depends on X86_PLATFORM_DEVICES + depends on X86_TSC + depends on PCI + depends on PCI_GOANY + depends on X86_IO_APIC + select IOSF_MBI + select INTEL_IMR + select COMMON_CLK + help + Select to include support for Quark X1000 SoC. + Say Y here if you have a Quark based system such as the Arduino + compatible Intel Galileo. + +config X86_INTEL_LPSS + bool "Intel Low Power Subsystem Support" + depends on X86 && ACPI && PCI + select COMMON_CLK + select PINCTRL + select IOSF_MBI + help + Select to build support for Intel Low Power Subsystem such as + found on Intel Lynxpoint PCH. Selecting this option enables + things like clock tree (common clock framework) and pincontrol + which are needed by the LPSS peripheral drivers. + +config X86_AMD_PLATFORM_DEVICE + bool "AMD ACPI2Platform devices support" + depends on ACPI + select COMMON_CLK + select PINCTRL + help + Select to interpret AMD specific ACPI device to platform device + such as I2C, UART, GPIO found on AMD Carrizo and later chipsets. + I2C and UART depend on COMMON_CLK to set clock. GPIO driver is + implemented under PINCTRL subsystem. + +config IOSF_MBI + tristate "Intel SoC IOSF Sideband support for SoC platforms" + depends on PCI + help + This option enables sideband register access support for Intel SoC + platforms. On these platforms the IOSF sideband is used in lieu of + MSR's for some register accesses, mostly but not limited to thermal + and power. Drivers may query the availability of this device to + determine if they need the sideband in order to work on these + platforms. The sideband is available on the following SoC products. + This list is not meant to be exclusive. + - BayTrail + - Braswell + - Quark + + You should say Y if you are running a kernel on one of these SoC's. + +config IOSF_MBI_DEBUG + bool "Enable IOSF sideband access through debugfs" + depends on IOSF_MBI && DEBUG_FS + help + Select this option to expose the IOSF sideband access registers (MCR, + MDR, MCRX) through debugfs to write and read register information from + different units on the SoC. This is most useful for obtaining device + state information for debug and analysis. As this is a general access + mechanism, users of this option would have specific knowledge of the + device they want to access. + + If you don't require the option or are in doubt, say N. + +config X86_RDC321X + bool "RDC R-321x SoC" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select M486 + select X86_REBOOTFIXUPS + help + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + +config X86_32_NON_STANDARD + bool "Support non-standard 32-bit SMP architectures" + depends on X86_32 && SMP + depends on X86_EXTENDED_PLATFORM + help + This option compiles in the bigsmp and STA2X11 default + subarchitectures. It is intended for a generic binary + kernel. If you select them all, kernel will probe it one by + one and will fallback to default. + +# Alphabetically sorted list of Non standard 32 bit platforms + +config X86_SUPPORTS_MEMORY_FAILURE + def_bool y + # MCE code calls memory_failure(): + depends on X86_MCE + # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: + # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: + depends on X86_64 || !SPARSEMEM + select ARCH_SUPPORTS_MEMORY_FAILURE + +config STA2X11 + bool "STA2X11 Companion Chip Support" + depends on X86_32_NON_STANDARD && PCI + select SWIOTLB + select MFD_STA2X11 + select GPIOLIB + help + This adds support for boards based on the STA2X11 IO-Hub, + a.k.a. "ConneXt". The chip is used in place of the standard + PC chipset, so all "standard" peripherals are missing. If this + option is selected the kernel will still be able to boot on + standard PC machines. + +config X86_32_IRIS + tristate "Eurobraille/Iris poweroff module" + depends on X86_32 + help + The Iris machines from EuroBraille do not have APM or ACPI support + to shut themselves down properly. A special I/O sequence is + needed to do so, which is what this module does at + kernel shutdown. + + This is only for Iris machines from EuroBraille. + + If unused, say N. + +config SCHED_OMIT_FRAME_POINTER + def_bool y + prompt "Single-depth WCHAN output" + depends on X86 + help + Calculate simpler /proc//wchan values. If this option + is disabled then wchan values will recurse back to the + caller function. This provides more accurate wchan values, + at the expense of slightly more scheduling overhead. + + If in doubt, say "Y". + +menuconfig HYPERVISOR_GUEST + bool "Linux guest support" + help + Say Y here to enable options for running Linux under various hyper- + visors. This option enables basic hypervisor detection and platform + setup. + + If you say N, all options in this submenu will be skipped and + disabled, and Linux guest support won't be built in. + +if HYPERVISOR_GUEST + +config PARAVIRT + bool "Enable paravirtualization code" + depends on HAVE_STATIC_CALL + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + +config PARAVIRT_XXL + bool + +config PARAVIRT_DEBUG + bool "paravirt-ops debugging" + depends on PARAVIRT && DEBUG_KERNEL + help + Enable to debug paravirt_ops internals. Specifically, BUG if + a paravirt_op is missing when it is called. + +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM / Xen kernels. + + If you are unsure how to answer this question, answer Y. + +config X86_HV_CALLBACK_VECTOR + def_bool n + +source "arch/x86/xen/Kconfig" + +config KVM_GUEST + bool "KVM Guest support (including kvmclock)" + depends on PARAVIRT + select PARAVIRT_CLOCK + select ARCH_CPUIDLE_HALTPOLL + select X86_HV_CALLBACK_VECTOR + default y + help + This option enables various optimizations for running under the KVM + hypervisor. It includes a paravirtualized clock, so that instead + of relying on a PIT (or probably other) emulation by the + underlying device model, the host provides the guest with + timing infrastructure such as time of day, and system time + +config ARCH_CPUIDLE_HALTPOLL + def_bool n + prompt "Disable host haltpoll when loading haltpoll driver" + help + If virtualized under KVM, disable host haltpoll. + +config PVH + bool "Support for running PVH guests" + help + This option enables the PVH entry point for guest virtual machines + as specified in the x86/HVM direct boot ABI. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + +config PARAVIRT_CLOCK + bool + +config JAILHOUSE_GUEST + bool "Jailhouse non-root cell support" + depends on X86_64 && PCI + select X86_PM_TIMER + help + This option allows to run Linux as guest in a Jailhouse non-root + cell. You can leave this option disabled if you only want to start + Jailhouse and run Linux afterwards in the root cell. + +config ACRN_GUEST + bool "ACRN Guest support" + depends on X86_64 + select X86_HV_CALLBACK_VECTOR + help + This option allows to run Linux as guest in the ACRN hypervisor. ACRN is + a flexible, lightweight reference open-source hypervisor, built with + real-time and safety-criticality in mind. It is built for embedded + IOT with small footprint and real-time features. More details can be + found in https://projectacrn.org/. + +config INTEL_TDX_GUEST + bool "Intel TDX (Trust Domain Extensions) - Guest Support" + depends on X86_64 && CPU_SUP_INTEL + depends on X86_X2APIC + select ARCH_HAS_CC_PLATFORM + select X86_MEM_ENCRYPT + select X86_MCE + help + Support running as a guest under Intel TDX. Without this support, + the guest kernel can not boot or run under TDX. + TDX includes memory encryption and integrity capabilities + which protect the confidentiality and integrity of guest + memory contents and CPU state. TDX guests are protected from + some attacks from the VMM. + +endif # HYPERVISOR_GUEST + +source "arch/x86/Kconfig.cpu" + +config HPET_TIMER + def_bool X86_64 + prompt "HPET Timer Support" if X86_32 + help + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. The interface used is documented + in the HPET spec, revision 1. + + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. + + Choose N to continue using the legacy 8254 timer. + +config HPET_EMULATE_RTC + def_bool y + depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) + +# Mark as expert because too many people got it wrong. +# The code disables itself when not needed. +config DMI + default y + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK + bool "Enable DMI scanning" if EXPERT + help + Enabled scanning of DMI to identify machine quirks. Say Y + here unless you have verified that your setup is not + affected by entries in the DMI blacklist. Required by PNP + BIOS code. + +config GART_IOMMU + bool "Old AMD GART IOMMU support" + select DMA_OPS + select IOMMU_HELPER + select SWIOTLB + depends on X86_64 && PCI && AMD_NB + help + Provides a driver for older AMD Athlon64/Opteron/Turion/Sempron + GART based hardware IOMMUs. + + The GART supports full DMA access for devices with 32-bit access + limitations, on systems with more than 3 GB. This is usually needed + for USB, sound, many IDE/SATA chipsets and some other devices. + + Newer systems typically have a modern AMD IOMMU, supported via + the CONFIG_AMD_IOMMU=y config option. + + In normal configurations this driver is only active when needed: + there's more than 3 GB of memory and the system contains a + 32-bit limited device. + + If unsure, say Y. + +config BOOT_VESA_SUPPORT + bool + help + If true, at least one selected framebuffer driver can take advantage + of VESA video modes set at an early boot stage via the vga= parameter. + +config MAXSMP + bool "Enable Maximum number of SMP Processors and NUMA Nodes" + depends on X86_64 && SMP && DEBUG_KERNEL + select CPUMASK_OFFSTACK + help + Enable maximum number of CPUS and NUMA Nodes for this architecture. + If unsure, say N. + +# +# The maximum number of CPUs supported: +# +# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT, +# and which can be configured interactively in the +# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range. +# +# The ranges are different on 32-bit and 64-bit kernels, depending on +# hardware capabilities and scalability features of the kernel. +# +# ( If MAXSMP is enabled we just use the highest possible value and disable +# interactive configuration. ) +# + +config NR_CPUS_RANGE_BEGIN + int + default NR_CPUS_RANGE_END if MAXSMP + default 1 if !SMP + default 2 + +config NR_CPUS_RANGE_END + int + depends on X86_32 + default 64 if SMP && X86_BIGSMP + default 8 if SMP && !X86_BIGSMP + default 1 if !SMP + +config NR_CPUS_RANGE_END + int + depends on X86_64 + default 8192 if SMP && CPUMASK_OFFSTACK + default 512 if SMP && !CPUMASK_OFFSTACK + default 1 if !SMP + +config NR_CPUS_DEFAULT + int + depends on X86_32 + default 32 if X86_BIGSMP + default 8 if SMP + default 1 if !SMP + +config NR_CPUS_DEFAULT + int + depends on X86_64 + default 8192 if MAXSMP + default 64 if SMP + default 1 if !SMP + +config NR_CPUS + int "Maximum number of CPUs" if SMP && !MAXSMP + range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END + default NR_CPUS_DEFAULT + help + This allows you to specify the maximum number of CPUs which this + kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum + supported value is 8192, otherwise the maximum value is 512. The + minimum value which makes sense is 2. + + This is purely to save memory: each supported CPU adds about 8KB + to the kernel image. + +config SCHED_CLUSTER + bool "Cluster scheduler support" + depends on SMP + default y + help + Cluster scheduler support improves the CPU scheduler's decision + making when dealing with machines that have clusters of CPUs. + Cluster usually means a couple of CPUs which are placed closely + by sharing mid-level caches, last-level cache tags or internal + busses. + +config SCHED_SMT + def_bool y if SMP + +config SCHED_MC + def_bool y + prompt "Multi-core scheduler support" + depends on SMP + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_MC_PRIO + bool "CPU core priorities scheduler support" + depends on SCHED_MC && CPU_SUP_INTEL + select X86_INTEL_PSTATE + select CPU_FREQ + default y + help + Intel Turbo Boost Max Technology 3.0 enabled CPUs have a + core ordering determined at manufacturing time, which allows + certain cores to reach higher turbo frequencies (when running + single threaded workloads) than others. + + Enabling this kernel feature teaches the scheduler about + the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the + scheduler's CPU selection logic accordingly, so that higher + overall system performance can be achieved. + + This feature will have no effect on CPUs without this feature. + + If unsure say Y here. + +config UP_LATE_INIT + def_bool y + depends on !SMP && X86_LOCAL_APIC + +config X86_UP_APIC + bool "Local APIC support on uniprocessors" if !PCI_MSI + default PCI_MSI + depends on X86_32 && !SMP && !X86_32_NON_STANDARD + help + A local APIC (Advanced Programmable Interrupt Controller) is an + integrated interrupt controller in the CPU. If you have a single-CPU + system which has a processor with a local APIC, you can say Y here to + enable and use it. If you say Y here even though your machine doesn't + have a local APIC, then the kernel will still run with no slowdown at + all. The local APIC supports CPU-generated self-interrupts (timer, + performance counters), and the NMI watchdog which detects hard + lockups. + +config X86_UP_IOAPIC + bool "IO-APIC support on uniprocessors" + depends on X86_UP_APIC + help + An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an + SMP-capable replacement for PC-style interrupt controllers. Most + SMP systems and many recent uniprocessor systems have one. + + If you have a single-CPU system with an IO-APIC, you can say Y here + to use it. If you say Y here even though your machine doesn't have + an IO-APIC, then the kernel will still run with no slowdown at all. + +config X86_LOCAL_APIC + def_bool y + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI + select IRQ_DOMAIN_HIERARCHY + select PCI_MSI_IRQ_DOMAIN if PCI_MSI + +config X86_IO_APIC + def_bool y + depends on X86_LOCAL_APIC || X86_UP_IOAPIC + +config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + depends on X86_IO_APIC + help + This option enables a workaround that fixes a source of + spurious interrupts. This is recommended when threaded + interrupt handling is used on systems where the generation of + superfluous "boot interrupts" cannot be disabled. + + Some chipsets generate a legacy INTx "boot IRQ" when the IRQ + entry in the chipset's IO-APIC is masked (as, e.g. the RT + kernel does during interrupt handling). On chipsets where this + boot IRQ generation cannot be disabled, this workaround keeps + the original IRQ line masked so that only the equivalent "boot + IRQ" is delivered to the CPUs. The workaround also tells the + kernel to set up the IRQ handler on the boot IRQ line. In this + way only one interrupt is delivered to the kernel. Otherwise + the spurious second interrupt may cause the kernel to bring + down (vital) interrupt lines. + + Only affects "broken" chipsets. Interrupt sharing may be + increased on these systems. + +config X86_MCE + bool "Machine Check / overheating reporting" + select GENERIC_ALLOCATOR + default y + help + Machine Check support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, data corruption). + The action the kernel takes depends on the severity of the problem, + ranging from warning messages to halting the machine. + +config X86_MCELOG_LEGACY + bool "Support for deprecated /dev/mcelog character device" + depends on X86_MCE + help + Enable support for /dev/mcelog which is needed by the old mcelog + userspace logging daemon. Consider switching to the new generation + rasdaemon solution. + +config X86_MCE_INTEL + def_bool y + prompt "Intel MCE features" + depends on X86_MCE && X86_LOCAL_APIC + help + Additional support for intel specific MCE features such as + the thermal monitor. + +config X86_MCE_AMD + def_bool y + prompt "AMD MCE features" + depends on X86_MCE && X86_LOCAL_APIC && AMD_NB + help + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. + +config X86_ANCIENT_MCE + bool "Support for old Pentium 5 / WinChip machine checks" + depends on X86_32 && X86_MCE + help + Include support for machine check handling on old Pentium 5 or WinChip + systems. These typically need to be enabled explicitly on the command + line. + +config X86_MCE_THRESHOLD + depends on X86_MCE_AMD || X86_MCE_INTEL + def_bool y + +config X86_MCE_INJECT + depends on X86_MCE && X86_LOCAL_APIC && DEBUG_FS + tristate "Machine check injector support" + help + Provide support for injecting machine checks for testing purposes. + If you don't know what a machine check is and you don't do kernel + QA it is safe to say n. + +source "arch/x86/events/Kconfig" + +config X86_LEGACY_VM86 + bool "Legacy VM86 support" + depends on X86_32 + help + This option allows user programs to put the CPU into V8086 + mode, which is an 80286-era approximation of 16-bit real mode. + + Some very old versions of X and/or vbetool require this option + for user mode setting. Similarly, DOSEMU will use it if + available to accelerate real mode DOS programs. However, any + recent version of DOSEMU, X, or vbetool should be fully + functional even without kernel VM86 support, as they will all + fall back to software emulation. Nevertheless, if you are using + a 16-bit DOS program where 16-bit performance matters, vm86 + mode might be faster than emulation and you might want to + enable this option. + + Note that any app that works on a 64-bit kernel is unlikely to + need this option, as 64-bit kernels don't, and can't, support + V8086 mode. This option is also unrelated to 16-bit protected + mode and is not needed to run most 16-bit programs under Wine. + + Enabling this option increases the complexity of the kernel + and slows down exception handling a tiny bit. + + If unsure, say N here. + +config VM86 + bool + default X86_LEGACY_VM86 + +config X86_16BIT + bool "Enable support for 16-bit segments" if EXPERT + default y + depends on MODIFY_LDT_SYSCALL + help + This option is required by programs like Wine to run 16-bit + protected mode legacy code on x86 processors. Disabling + this option saves about 300 bytes on i386, or around 6K text + plus 16K runtime memory on x86-64, + +config X86_ESPFIX32 + def_bool y + depends on X86_16BIT && X86_32 + +config X86_ESPFIX64 + def_bool y + depends on X86_16BIT && X86_64 + +config X86_VSYSCALL_EMULATION + bool "Enable vsyscall emulation" if EXPERT + default y + depends on X86_64 + help + This enables emulation of the legacy vsyscall page. Disabling + it is roughly equivalent to booting with vsyscall=none, except + that it will also disable the helpful warning if a program + tries to use a vsyscall. With this option set to N, offending + programs will just segfault, citing addresses of the form + 0xffffffffff600?00. + + This option is required by many programs built before 2013, and + care should be used even with newer programs if set to N. + + Disabling this option saves about 7K of kernel size and + possibly 4K of additional runtime pagetable memory. + +config X86_IOPL_IOPERM + bool "IOPERM and IOPL Emulation" + default y + help + This enables the ioperm() and iopl() syscalls which are necessary + for legacy applications. + + Legacy IOPL support is an overbroad mechanism which allows user + space aside of accessing all 65536 I/O ports also to disable + interrupts. To gain this access the caller needs CAP_SYS_RAWIO + capabilities and permission from potentially active security + modules. + + The emulation restricts the functionality of the syscall to + only allowing the full range I/O port access, but prevents the + ability to disable interrupts from user space which would be + granted if the hardware IOPL mechanism would be used. + +config TOSHIBA + tristate "Toshiba Laptop support" + depends on X86_32 + help + This adds a driver to safely access the System Management Mode of + the CPU on Toshiba portables with a genuine Toshiba BIOS. It does + not work on models with a Phoenix BIOS. The System Management Mode + is used to set the BIOS and power saving options on Toshiba portables. + + For information on utilities to make use of this driver see the + Toshiba Linux utilities web site at: + . + + Say Y if you intend to run this kernel on a Toshiba portable. + Say N otherwise. + +config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" + depends on X86_32 + help + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on + some combinations of hardware and BIOS. The symptom, for which + this config is intended, is when reboot ends with a stalled/hung + system. + + Currently, the only fixup is for the Geode machines using + CS5530A and CS5536 chipsets and the RDC R-321x SoC. + + Say Y if you want to enable the fixup. Currently, it's safe to + enable this option even if you don't need it. + Say N otherwise. + +config MICROCODE + bool "CPU microcode loading support" + default y + depends on CPU_SUP_AMD || CPU_SUP_INTEL + help + If you say Y here, you will be able to update the microcode on + Intel and AMD processors. The Intel support is for the IA32 family, + e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The + AMD support is for families 0x10 and later. You will obviously need + the actual microcode binary data itself which is not shipped with + the Linux kernel. + + The preferred method to load microcode from a detached initrd is described + in Documentation/x86/microcode.rst. For that you need to enable + CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the + initrd for microcode blobs. + + In addition, you can build the microcode into the kernel. For that you + need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE + config option. + +config MICROCODE_INTEL + bool "Intel microcode loading support" + depends on CPU_SUP_INTEL && MICROCODE + default MICROCODE + help + This options enables microcode patch loading support for Intel + processors. + + For the current Intel microcode data package go to + and search for + 'Linux Processor Microcode Data File'. + +config MICROCODE_AMD + bool "AMD microcode loading support" + depends on CPU_SUP_AMD && MICROCODE + help + If you select this option, microcode patch loading support for AMD + processors will be enabled. + +config MICROCODE_LATE_LOADING + bool "Late microcode loading (DANGEROUS)" + default n + depends on MICROCODE + help + Loading microcode late, when the system is up and executing instructions + is a tricky business and should be avoided if possible. Just the sequence + of synchronizing all cores and SMT threads is one fragile dance which does + not guarantee that cores might not softlock after the loading. Therefore, + use this at your own risk. Late loading taints the kernel too. + +config X86_MSR + tristate "/dev/cpu/*/msr - Model-specific register support" + help + This device gives privileged processes access to the x86 + Model-Specific Registers (MSRs). It is a character device with + major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. + MSR accesses are directed to a specific CPU on multi-processor + systems. + +config X86_CPUID + tristate "/dev/cpu/*/cpuid - CPU information support" + help + This device gives processes access to the x86 CPUID instruction to + be executed on a specific processor. It is a character device + with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to + /dev/cpu/31/cpuid. + +choice + prompt "High Memory Support" + default HIGHMEM4G + depends on X86_32 + +config NOHIGHMEM + bool "off" + help + Linux can use up to 64 Gigabytes of physical memory on x86 systems. + However, the address space of 32-bit x86 processors is only 4 + Gigabytes large. That means that, if you have a large amount of + physical memory, not all of it can be "permanently mapped" by the + kernel. The physical memory that's not permanently mapped is called + "high memory". + + If you are compiling a kernel which will never run on a machine with + more than 1 Gigabyte total physical RAM, answer "off" here (default + choice and suitable for most users). This will result in a "3GB/1GB" + split: 3GB are mapped so that each process sees a 3GB virtual memory + space and the remaining part of the 4GB virtual memory space is used + by the kernel to permanently map as much physical memory as + possible. + + If the machine has between 1 and 4 Gigabytes physical RAM, then + answer "4GB" here. + + If more than 4 Gigabytes is used then answer "64GB" here. This + selection turns Intel PAE (Physical Address Extension) mode on. + PAE implements 3-level paging on IA32 processors. PAE is fully + supported by Linux, PAE mode is implemented on all recent Intel + processors (Pentium Pro and better). NOTE: If you say "64GB" here, + then the kernel will not boot on CPUs that don't support PAE! + + The actual amount of total physical memory will either be + auto detected or can be forced by using a kernel command line option + such as "mem=256M". (Try "man bootparam" or see the documentation of + your boot loader (lilo or loadlin) about how to pass options to the + kernel at boot time.) + + If unsure, say "off". + +config HIGHMEM4G + bool "4GB" + help + Select this if you have a 32-bit processor and between 1 and 4 + gigabytes of physical RAM. + +config HIGHMEM64G + bool "64GB" + depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6 + select X86_PAE + help + Select this if you have a 32-bit processor and more than 4 + gigabytes of physical RAM. + +endchoice + +choice + prompt "Memory split" if EXPERT + default VMSPLIT_3G + depends on X86_32 + help + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + depends on !X86_PAE + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_2G_OPT + depends on !X86_PAE + bool "2G/2G user/kernel split (for full 2G low memory)" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x80000000 if VMSPLIT_2G + default 0x78000000 if VMSPLIT_2G_OPT + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + depends on X86_32 + +config HIGHMEM + def_bool y + depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) + +config X86_PAE + bool "PAE (Physical Address Extension) Support" + depends on X86_32 && !HIGHMEM4G + select PHYS_ADDR_T_64BIT + select SWIOTLB + help + PAE is required for NX support, and furthermore enables + larger swapspace support for non-overcommit purposes. It + has the cost of more pagetable lookup overhead, and also + consumes more pagetable space per process. + +config X86_5LEVEL + bool "Enable 5-level page tables support" + default y + select DYNAMIC_MEMORY_LAYOUT + select SPARSEMEM_VMEMMAP + depends on X86_64 + help + 5-level paging enables access to larger address space: + upto 128 PiB of virtual address space and 4 PiB of + physical address space. + + It will be supported by future Intel CPUs. + + A kernel with the option enabled can be booted on machines that + support 4- or 5-level paging. + + See Documentation/x86/x86_64/5level-paging.rst for more + information. + + Say N if unsure. + +config X86_DIRECT_GBPAGES + def_bool y + depends on X86_64 + help + Certain kernel features effectively disable kernel + linear 1 GB mappings (even if the CPU otherwise + supports them), so don't confuse the user by printing + that we have them enabled. + +config X86_CPA_STATISTICS + bool "Enable statistic for Change Page Attribute" + depends on DEBUG_FS + help + Expose statistics about the Change Page Attribute mechanism, which + helps to determine the effectiveness of preserving large and huge + page mappings when mapping protections are changed. + +config X86_MEM_ENCRYPT + select ARCH_HAS_FORCE_DMA_UNENCRYPTED + select DYNAMIC_PHYSICAL_MASK + def_bool n + +config AMD_MEM_ENCRYPT + bool "AMD Secure Memory Encryption (SME) support" + depends on X86_64 && CPU_SUP_AMD + select DMA_COHERENT_POOL + select ARCH_USE_MEMREMAP_PROT + select INSTRUCTION_DECODER + select ARCH_HAS_CC_PLATFORM + select X86_MEM_ENCRYPT + help + Say yes to enable support for the encryption of system memory. + This requires an AMD processor that supports Secure Memory + Encryption (SME). + +config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT + bool "Activate AMD Secure Memory Encryption (SME) by default" + depends on AMD_MEM_ENCRYPT + help + Say yes to have system memory encrypted by default if running on + an AMD processor that supports Secure Memory Encryption (SME). + + If set to Y, then the encryption of system memory can be + deactivated with the mem_encrypt=off command line option. + + If set to N, then the encryption of system memory can be + activated with the mem_encrypt=on command line option. + +# Common NUMA Features +config NUMA + bool "NUMA Memory Allocation and Scheduler Support" + depends on SMP + depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) + default y if X86_BIGSMP + select USE_PERCPU_NUMA_NODE_ID + help + Enable NUMA (Non-Uniform Memory Access) support. + + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. + + For 64-bit this is recommended if the system is Intel Core i7 + (or later), AMD Opteron, or EM64T NUMA. + + For 32-bit this is only needed if you boot a 32-bit + kernel on a 64-bit NUMA platform. + + Otherwise, you should say N. + +config AMD_NUMA + def_bool y + prompt "Old style AMD Opteron NUMA detection" + depends on X86_64 && NUMA && PCI + help + Enable AMD NUMA node topology detection. You should say Y here if + you have a multi processor AMD system. This uses an old method to + read the NUMA configuration directly from the builtin Northbridge + of Opteron. It is recommended to use X86_64_ACPI_NUMA instead, + which also takes priority if both are compiled in. + +config X86_64_ACPI_NUMA + def_bool y + prompt "ACPI NUMA detection" + depends on X86_64 && NUMA && ACPI && PCI + select ACPI_NUMA + help + Enable ACPI SRAT based node topology detection. + +config NUMA_EMU + bool "NUMA emulation" + depends on NUMA + help + Enable NUMA emulation. A flat machine will be split + into virtual nodes when booted with "numa=fake=N", where N is the + number of nodes. This is only useful for debugging. + +config NODES_SHIFT + int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP + range 1 10 + default "10" if MAXSMP + default "6" if X86_64 + default "3" + depends on NUMA + help + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accommodate various tables. + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on X86_32 && !NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD + select SPARSEMEM_STATIC if X86_32 + select SPARSEMEM_VMEMMAP_ENABLE if X86_64 + +config ARCH_SPARSEMEM_DEFAULT + def_bool X86_64 || (NUMA && X86_32) + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE + +config ARCH_MEMORY_PROBE + bool "Enable sysfs memory/probe interface" + depends on MEMORY_HOTPLUG + help + This option enables a sysfs memory/probe interface for testing. + See Documentation/admin-guide/mm/memory-hotplug.rst for more information. + If you are unsure how to answer this question, answer N. + +config ARCH_PROC_KCORE_TEXT + def_bool y + depends on X86_64 && PROC_KCORE + +config ILLEGAL_POINTER_VALUE + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 + +config X86_PMEM_LEGACY_DEVICE + bool + +config X86_PMEM_LEGACY + tristate "Support non-standard NVDIMMs and ADR protected memory" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + select X86_PMEM_LEGACY_DEVICE + select NUMA_KEEP_MEMINFO if NUMA + select LIBNVDIMM + help + Treat memory marked using the non-standard e820 type of 12 as used + by the Intel Sandy Bridge-EP reference BIOS as protected memory. + The kernel will offer these regions to the 'pmem' driver so + they can be used for persistent storage. + + Say Y if unsure. + +config HIGHPTE + bool "Allocate 3rd-level pagetables from highmem" + depends on HIGHMEM + help + The VM uses one page table entry for each page of physical memory. + For systems with a lot of RAM, this can be wasteful of precious + low memory. Setting this option will put user-space page table + entries in high memory. + +config X86_CHECK_BIOS_CORRUPTION + bool "Check for low memory corruption" + help + Periodically check for memory corruption in low memory, which + is suspected to be caused by BIOS. Even when enabled in the + configuration, it is disabled at runtime. Enable it by + setting "memory_corruption_check=1" on the kernel command + line. By default it scans the low 64k of memory every 60 + seconds; see the memory_corruption_check_size and + memory_corruption_check_period parameters in + Documentation/admin-guide/kernel-parameters.rst to adjust this. + + When enabled with the default parameters, this option has + almost no overhead, as it reserves a relatively small amount + of memory and scans it infrequently. It both detects corruption + and prevents it from affecting the running system. + + It is, however, intended as a diagnostic tool; if repeatable + BIOS-originated corruption always affects the same memory, + you can use memmap= to prevent the kernel from using that + memory. + +config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK + bool "Set the default setting of memory_corruption_check" + depends on X86_CHECK_BIOS_CORRUPTION + default y + help + Set whether the default state of memory_corruption_check is + on or off. + +config MATH_EMULATION + bool + depends on MODIFY_LDT_SYSCALL + prompt "Math emulation" if X86_32 && (M486SX || MELAN) + help + Linux can emulate a math coprocessor (used for floating point + operations) if you don't have one. 486DX and Pentium processors have + a math coprocessor built in, 486SX and 386 do not, unless you added + a 487DX or 387, respectively. (The messages during boot time can + give you some hints here ["man dmesg"].) Everyone needs either a + coprocessor or this emulation. + + If you don't have a math coprocessor, you need to say Y here; if you + say Y here even though you have a coprocessor, the coprocessor will + be used nevertheless. (This behavior can be changed with the kernel + command line option "no387", which comes handy if your coprocessor + is broken. Try "man bootparam" or see the documentation of your boot + loader (lilo or loadlin) about how to pass options to the kernel at + boot time.) This means that it is a good idea to say Y here if you + intend to use this kernel on different machines. + + More information about the internals of the Linux math coprocessor + emulation can be found in . + + If you are not sure, say Y; apart from resulting in a 66 KB bigger + kernel, it won't hurt. + +config MTRR + def_bool y + prompt "MTRR (Memory Type Range Register) support" if EXPERT + help + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control + processor access to memory ranges. This is most useful if you have + a video (VGA) card on a PCI or AGP bus. Enabling write-combining + allows bus write transfers to be combined into a larger transfer + before bursting over the PCI/AGP bus. This can increase performance + of image write operations 2.5 times or more. Saying Y here creates a + /proc/mtrr file which may be used to manipulate your processor's + MTRRs. Typically the X server should use this. + + This code has a reasonably generic interface so that similar + control registers on other processors can be easily supported + as well: + + The Cyrix 6x86, 6x86MX and M II processors have Address Range + Registers (ARRs) which provide a similar functionality to MTRRs. For + these, the ARRs are used to emulate the MTRRs. + The AMD K6-2 (stepping 8 and above) and K6-3 processors have two + MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing + write-combining. All of these processors are supported by this code + and it makes sense to say Y here if you have one of them. + + Saying Y here also fixes a problem with buggy SMP BIOSes which only + set the MTRRs for the boot CPU and not for the secondary CPUs. This + can lead to all sorts of problems, so it's good to say Y here. + + You can safely say Y even if your machine doesn't have MTRRs, you'll + just add about 9 KB to your kernel. + + See for more information. + +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + help + Convert MTRR layout from continuous to discrete, so X drivers can + add writeback entries. + + Can be disabled with disable_mtrr_cleanup on the kernel command line. + The largest mtrr entry size for a continuous block can be set with + mtrr_chunk_size. + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + int "MTRR cleanup enable value (0-1)" + range 0 1 + default "0" + depends on MTRR_SANITIZER + help + Enable mtrr cleanup default value + +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT + int "MTRR cleanup spare reg num (0-7)" + range 0 7 + default "1" + depends on MTRR_SANITIZER + help + mtrr cleanup spare entries default, it can be changed via + mtrr_spare_reg_nr=N on the kernel command line. + +config X86_PAT + def_bool y + prompt "x86 PAT support" if EXPERT + depends on MTRR + help + Use PAT attributes to setup page level cache control. + + PATs are the modern equivalents of MTRRs and are much more + flexible than MTRRs. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots) or a non-working video driver. + + If unsure, say Y. + +config ARCH_USES_PG_UNCACHED + def_bool y + depends on X86_PAT + +config X86_UMIP + def_bool y + prompt "User Mode Instruction Prevention" if EXPERT + help + User Mode Instruction Prevention (UMIP) is a security feature in + some x86 processors. If enabled, a general protection fault is + issued if the SGDT, SLDT, SIDT, SMSW or STR instructions are + executed in user mode. These instructions unnecessarily expose + information about the hardware state. + + The vast majority of applications do not use these instructions. + For the very few that do, software emulation is provided in + specific cases in protected and virtual-8086 modes. Emulated + results are dummy. + +config CC_HAS_IBT + # GCC >= 9 and binutils >= 2.29 + # Retpoline check to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93654 + # Clang/LLVM >= 14 + # https://github.com/llvm/llvm-project/commit/e0b89df2e0f0130881bf6c39bf31d7f6aac00e0f + # https://github.com/llvm/llvm-project/commit/dfcf69770bc522b9e411c66454934a37c1f35332 + def_bool ((CC_IS_GCC && $(cc-option, -fcf-protection=branch -mindirect-branch-register)) || \ + (CC_IS_CLANG && CLANG_VERSION >= 140000)) && \ + $(as-instr,endbr64) + +config X86_KERNEL_IBT + prompt "Indirect Branch Tracking" + bool + depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL + # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f + depends on !LD_IS_LLD || LLD_VERSION >= 140000 + select OBJTOOL + help + Build the kernel with support for Indirect Branch Tracking, a + hardware support course-grain forward-edge Control Flow Integrity + protection. It enforces that all indirect calls must land on + an ENDBR instruction, as such, the compiler will instrument the + code with them to make this happen. + + In addition to building the kernel with IBT, seal all functions that + are not indirect call targets, avoiding them ever becoming one. + + This requires LTO like objtool runs and will slow down the build. It + does significantly reduce the number of ENDBR instructions in the + kernel image. + +config X86_INTEL_MEMORY_PROTECTION_KEYS + prompt "Memory Protection Keys" + def_bool y + # Note: only available in 64-bit mode + depends on X86_64 && (CPU_SUP_INTEL || CPU_SUP_AMD) + select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_HAS_PKEYS + help + Memory Protection Keys provides a mechanism for enforcing + page-based protections, but without requiring modification of the + page tables when an application changes protection domains. + + For details, see Documentation/core-api/protection-keys.rst + + If unsure, say y. + +choice + prompt "TSX enable mode" + depends on CPU_SUP_INTEL + default X86_INTEL_TSX_MODE_OFF + help + Intel's TSX (Transactional Synchronization Extensions) feature + allows to optimize locking protocols through lock elision which + can lead to a noticeable performance boost. + + On the other hand it has been shown that TSX can be exploited + to form side channel attacks (e.g. TAA) and chances are there + will be more of those attacks discovered in the future. + + Therefore TSX is not enabled by default (aka tsx=off). An admin + might override this decision by tsx=on the command line parameter. + Even with TSX enabled, the kernel will attempt to enable the best + possible TAA mitigation setting depending on the microcode available + for the particular machine. + + This option allows to set the default tsx mode between tsx=on, =off + and =auto. See Documentation/admin-guide/kernel-parameters.txt for more + details. + + Say off if not sure, auto if TSX is in use but it should be used on safe + platforms or on if TSX is in use and the security aspect of tsx is not + relevant. + +config X86_INTEL_TSX_MODE_OFF + bool "off" + help + TSX is disabled if possible - equals to tsx=off command line parameter. + +config X86_INTEL_TSX_MODE_ON + bool "on" + help + TSX is always enabled on TSX capable HW - equals the tsx=on command + line parameter. + +config X86_INTEL_TSX_MODE_AUTO + bool "auto" + help + TSX is enabled on TSX capable HW that is believed to be safe against + side channel attacks- equals the tsx=auto command line parameter. +endchoice + +config X86_SGX + bool "Software Guard eXtensions (SGX)" + depends on X86_64 && CPU_SUP_INTEL && X86_X2APIC + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + select SRCU + select MMU_NOTIFIER + select NUMA_KEEP_MEMINFO if NUMA + select XARRAY_MULTI + help + Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions + that can be used by applications to set aside private regions of code + and data, referred to as enclaves. An enclave's private memory can + only be accessed by code running within the enclave. Accesses from + outside the enclave, including other enclaves, are disallowed by + hardware. + + If unsure, say N. + +config EFI + bool "EFI runtime service support" + depends on ACPI + select UCS2_STRING + select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT + help + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). + + This option is only useful on systems that have EFI firmware. + In addition, you should use the latest ELILO loader available + at in order to take advantage + of EFI runtime services. However, even with this option, the + resultant kernel should continue to boot on existing non-EFI + platforms. + +config EFI_STUB + bool "EFI stub support" + depends on EFI + select RELOCATABLE + help + This kernel feature allows a bzImage to be loaded directly + by EFI firmware without the use of a bootloader. + + See Documentation/admin-guide/efi-stub.rst for more information. + +config EFI_MIXED + bool "EFI mixed-mode support" + depends on EFI_STUB && X86_64 + help + Enabling this feature allows a 64-bit kernel to be booted + on a 32-bit firmware, provided that your CPU supports 64-bit + mode. + + Note that it is not possible to boot a mixed-mode enabled + kernel via the EFI boot stub - a bootloader that supports + the EFI handover protocol must be used. + + If unsure, say N. + +source "kernel/Kconfig.hz" + +config KEXEC + bool "kexec system call" + select KEXEC_CORE + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. As of this writing the exact hardware + interface is strongly in flux, so no good recommendation can be + made. + +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select HAVE_IMA_KEXEC if IMA + depends on X86_64 + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + help + This is new version of kexec system call. This system call is + file based and takes file descriptors as system call argument + for kernel and initramfs as opposed to list of segments as + accepted by previous system call. + +config ARCH_HAS_KEXEC_PURGATORY + def_bool KEXEC_FILE + +config KEXEC_SIG + bool "Verify kernel signature during kexec_file_load() syscall" + depends on KEXEC_FILE + help + + This option makes the kexec_file_load() syscall check for a valid + signature of the kernel image. The image can still be loaded without + a valid signature unless you also enable KEXEC_SIG_FORCE, though if + there's a signature that we can check, then it must be valid. + + In addition to this option, you need to enable signature + verification for the corresponding kernel image type being + loaded in order for this to work. + +config KEXEC_SIG_FORCE + bool "Require a valid signature in kexec_file_load() syscall" + depends on KEXEC_SIG + help + This option makes kernel signature verification mandatory for + the kexec_file_load() syscall. + +config KEXEC_BZIMAGE_VERIFY_SIG + bool "Enable bzImage signature verification support" + depends on KEXEC_SIG + depends on SIGNED_PE_FILE_VERIFICATION + select SYSTEM_TRUSTED_KEYRING + help + Enable bzImage signature verification support. + +config CRASH_DUMP + bool "kernel crash dumps" + depends on X86_64 || (X86_32 && HIGHMEM) + help + Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or BIOS using + PHYSICAL_START, or it must be built as a relocatable image + (CONFIG_RELOCATABLE=y). + For more details see Documentation/admin-guide/kdump/kdump.rst + +config KEXEC_JUMP + bool "kexec jump" + depends on KEXEC && HIBERNATION + help + Jump between original kernel and kexeced kernel and invoke + code in physical address mode via KEXEC + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) + default "0x1000000" + help + This gives the physical address where the kernel is loaded. + + If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then + bzImage will decompress itself to above physical address and + run from there. Otherwise, bzImage will run from the address where + it has been loaded by the boot loader and will ignore above physical + address. + + In normal kdump cases one does not have to set/change this option + as now bzImage can be compiled as a completely relocatable image + (CONFIG_RELOCATABLE=y) and be used to load and run from a different + address. This option is mainly useful for the folks who don't want + to use a bzImage for capturing the crash dump and want to use a + vmlinux instead. vmlinux is not relocatable hence a kernel needs + to be specifically compiled to run from a specific memory area + (normally a reserved region) and this option comes handy. + + So if you are using bzImage for capturing the crash dump, + leave the value here unchanged to 0x1000000 and set + CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux + for capturing the crash dump change this value to start of + the reserved region. In other words, it can be set based on + the "X" value as specified in the "crashkernel=YM@XM" + command line boot parameter passed to the panic-ed + kernel. Please take a look at Documentation/admin-guide/kdump/kdump.rst + for more details about crash dumps. + + Usage of bzImage for capturing the crash dump is recommended as + one does not have to build two kernels. Same kernel can be used + as production kernel and capture kernel. Above option should have + gone away after relocatable bzImage support is introduced. But it + is present because there are users out there who continue to use + vmlinux for dump capture. This option should go away down the + line. + + Don't change this unless you know what you are doing. + +config RELOCATABLE + bool "Build a relocatable kernel" + default y + help + This builds a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to make the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical address + (CONFIG_PHYSICAL_START) is used as the minimum location. + +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image (KASLR)" + depends on RELOCATABLE + default y + help + In support of Kernel Address Space Layout Randomization (KASLR), + this randomizes the physical address at which the kernel image + is decompressed and the virtual address where the kernel + image is mapped, as a security feature that deters exploit + attempts relying on knowledge of the location of kernel + code internals. + + On 64-bit, the kernel physical and virtual addresses are + randomized separately. The physical address will be anywhere + between 16MB and the top of physical memory (up to 64TB). The + virtual address will be randomized from 16MB up to 1GB (9 bits + of entropy). Note that this also reduces the memory space + available to kernel modules from 1.5GB to 1GB. + + On 32-bit, the kernel physical and virtual addresses are + randomized together. They will be randomized from 16MB up to + 512MB (8 bits of entropy). + + Entropy is generated using the RDRAND instruction if it is + supported. If RDTSC is supported, its value is mixed into + the entropy pool as well. If neither RDRAND nor RDTSC are + supported, then entropy is read from the i8254 timer. The + usable entropy is limited by the kernel being built using + 2GB addressing, and that PHYSICAL_ALIGN must be at a + minimum of 2MB. As a result, only 10 bits of entropy are + theoretically possible, but the implementations are further + limited due to memory layouts. + + If unsure, say Y. + +# Relocation on x86 needs some additional build support +config X86_NEED_RELOCS + def_bool y + depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) + +config PHYSICAL_ALIGN + hex "Alignment value to which kernel should be aligned" + default "0x200000" + range 0x2000 0x1000000 if X86_32 + range 0x200000 0x1000000 if X86_64 + help + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is set, kernel will move itself to nearest + address aligned to above value and run from there. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is not set, kernel will ignore the run time + load address and decompress itself to the address it has been + compiled for and run from there. The address for which kernel is + compiled already meets above alignment restrictions. Hence the + end result is that kernel runs from a physical address meeting + above alignment restrictions. + + On 32-bit this value must be a multiple of 0x2000. On 64-bit + this value must be a multiple of 0x200000. + + Don't change this unless you know what you are doing. + +config DYNAMIC_MEMORY_LAYOUT + bool + help + This option makes base addresses of vmalloc and vmemmap as well as + __PAGE_OFFSET movable during boot. + +config RANDOMIZE_MEMORY + bool "Randomize the kernel memory sections" + depends on X86_64 + depends on RANDOMIZE_BASE + select DYNAMIC_MEMORY_LAYOUT + default RANDOMIZE_BASE + help + Randomizes the base virtual address of kernel memory sections + (physical memory mapping, vmalloc & vmemmap). This security feature + makes exploits relying on predictable memory locations less reliable. + + The order of allocations remains unchanged. Entropy is generated in + the same way as RANDOMIZE_BASE. Current implementation in the optimal + configuration have in average 30,000 different possible virtual + addresses for each memory section. + + If unsure, say Y. + +config RANDOMIZE_MEMORY_PHYSICAL_PADDING + hex "Physical memory mapping padding" if EXPERT + depends on RANDOMIZE_MEMORY + default "0xa" if MEMORY_HOTPLUG + default "0x0" + range 0x1 0x40 if MEMORY_HOTPLUG + range 0x0 0x40 + help + Define the padding in terabytes added to the existing physical + memory size during kernel memory randomization. It is useful + for memory hotplug support but reduces the entropy available for + address randomization. + + If unsure, leave at the default value. + +config HOTPLUG_CPU + def_bool y + depends on SMP + +config BOOTPARAM_HOTPLUG_CPU0 + bool "Set default setting of cpu0_hotpluggable" + depends on HOTPLUG_CPU + help + Set whether default state of cpu0_hotpluggable is on or off. + + Say Y here to enable CPU0 hotplug by default. If this switch + is turned on, there is no need to give cpu0_hotplug kernel + parameter and the CPU0 hotplug feature is enabled by default. + + Please note: there are two known CPU0 dependencies if you want + to enable the CPU0 hotplug feature either by this switch or by + cpu0_hotplug kernel parameter. + + First, resume from hibernate or suspend always starts from CPU0. + So hibernate and suspend are prevented if CPU0 is offline. + + Second dependency is PIC interrupts always go to CPU0. CPU0 can not + offline if any interrupt can not migrate out of CPU0. There may + be other CPU0 dependencies. + + Please make sure the dependencies are under your control before + you enable this feature. + + Say N if you don't want to enable CPU0 hotplug feature by default. + You still can enable the CPU0 hotplug feature at boot by kernel + parameter cpu0_hotplug. + +config DEBUG_HOTPLUG_CPU0 + def_bool n + prompt "Debug CPU0 hotplug" + depends on HOTPLUG_CPU + help + Enabling this option offlines CPU0 (if CPU0 can be offlined) as + soon as possible and boots up userspace with CPU0 offlined. User + can online CPU0 back after boot time. + + To debug CPU0 hotplug, you need to enable CPU0 offline/online + feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during + compilation or giving cpu0_hotplug kernel parameter at boot. + + If unsure, say N. + +config COMPAT_VDSO + def_bool n + prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" + depends on COMPAT_32 + help + Certain buggy versions of glibc will crash if they are + presented with a 32-bit vDSO that is not mapped at the address + indicated in its segment table. + + The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a + and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and + 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is + the only released version with the bug, but OpenSUSE 9 + contains a buggy "glibc 2.3.2". + + The symptom of the bug is that everything crashes on startup, saying: + dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed! + + Saying Y here changes the default value of the vdso32 boot + option from 1 to 0, which turns off the 32-bit vDSO entirely. + This works around the glibc bug but hurts performance. + + If unsure, say N: if you are compiling your own kernel, you + are unlikely to be using a buggy version of glibc. + +choice + prompt "vsyscall table for legacy applications" + depends on X86_64 + default LEGACY_VSYSCALL_XONLY + help + Legacy user code that does not know how to find the vDSO expects + to be able to issue three syscalls by calling fixed addresses in + kernel space. Since this location is not randomized with ASLR, + it can be used to assist security vulnerability exploitation. + + This setting can be changed at boot time via the kernel command + line parameter vsyscall=[emulate|xonly|none]. Emulate mode + is deprecated and can only be enabled using the kernel command + line. + + On a system with recent enough glibc (2.14 or newer) and no + static binaries, you can say None without a performance penalty + to improve security. + + If unsure, select "Emulate execution only". + + config LEGACY_VSYSCALL_XONLY + bool "Emulate execution only" + help + The kernel traps and emulates calls into the fixed vsyscall + address mapping and does not allow reads. This + configuration is recommended when userspace might use the + legacy vsyscall area but support for legacy binary + instrumentation of legacy code is not needed. It mitigates + certain uses of the vsyscall area as an ASLR-bypassing + buffer. + + config LEGACY_VSYSCALL_NONE + bool "None" + help + There will be no vsyscall mapping at all. This will + eliminate any risk of ASLR bypass due to the vsyscall + fixed address mapping. Attempts to use the vsyscalls + will be reported to dmesg, so that either old or + malicious userspace programs can be identified. + +endchoice + +config CMDLINE_BOOL + bool "Built-in kernel command line" + help + Allow for specifying boot arguments to the kernel at + build time. On some systems (e.g. embedded ones), it is + necessary or convenient to provide some or all of the + kernel boot arguments with the kernel itself (that is, + to not rely on the boot loader to provide them.) + + To compile command line arguments into the kernel, + set this option to 'Y', then fill in the + boot arguments in CONFIG_CMDLINE. + + Systems with fully functional boot loaders (i.e. non-embedded) + should leave this option set to 'N'. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + help + Enter arguments here that should be compiled into the kernel + image and used at boot time. If the boot loader provides a + command line at boot time, it is appended to this string to + form the full kernel command line, when the system boots. + + However, you can use the CONFIG_CMDLINE_OVERRIDE option to + change this behavior. + + In most cases, the command line (whether built-in or provided + by the boot loader) should specify the device for the root + file system. + +config CMDLINE_OVERRIDE + bool "Built-in command line overrides boot loader arguments" + depends on CMDLINE_BOOL && CMDLINE != "" + help + Set this option to 'Y' to have the kernel ignore the boot loader + command line, and use ONLY the built-in command line. + + This is used to work around broken boot loaders. This should + be set to 'N' under normal conditions. + +config MODIFY_LDT_SYSCALL + bool "Enable the LDT (local descriptor table)" if EXPERT + default y + help + Linux can allow user programs to install a per-process x86 + Local Descriptor Table (LDT) using the modify_ldt(2) system + call. This is required to run 16-bit or segmented code such as + DOSEMU or some Wine programs. It is also used by some very old + threading libraries. + + Enabling this feature adds a small amount of overhead to + context switches and increases the low-level kernel attack + surface. Disabling it removes the modify_ldt(2) system call. + + Saying 'N' here may make sense for embedded or server kernels. + +config STRICT_SIGALTSTACK_SIZE + bool "Enforce strict size checking for sigaltstack" + depends on DYNAMIC_SIGFRAME + help + For historical reasons MINSIGSTKSZ is a constant which became + already too small with AVX512 support. Add a mechanism to + enforce strict checking of the sigaltstack size against the + real size of the FPU frame. This option enables the check + by default. It can also be controlled via the kernel command + line option 'strict_sas_size' independent of this config + switch. Enabling it might break existing applications which + allocate a too small sigaltstack but 'work' because they + never get a signal delivered. + + Say 'N' unless you want to really enforce this check. + +source "kernel/livepatch/Kconfig" + +endmenu + +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + select OBJTOOL if HAVE_OBJTOOL + default y if X86_64 + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK && X86_64 + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD && X86_64 + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL && X86_64 + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config CPU_SRSO + bool "Mitigate speculative RAS overflow on AMD" + depends on CPU_SUP_AMD && X86_64 && RETHUNK + default y + help + Enable the SRSO mitigation needed on AMD Zen1-4 machines. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + select OBJTOOL if HAVE_OBJTOOL + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +config GDS_FORCE_MITIGATION + bool "Force GDS Mitigation" + depends on CPU_SUP_INTEL + default n + help + Gather Data Sampling (GDS) is a hardware vulnerability which allows + unprivileged speculative access to data which was previously stored in + vector registers. + + This option is equivalent to setting gather_data_sampling=force on the + command line. The microcode mitigation is used if present, otherwise + AVX is disabled as a mitigation. On affected systems that are missing + the microcode any userspace code that unconditionally uses AVX will + break with this option set. + + Setting this option on systems not vulnerable to GDS has no effect. + + If in doubt, say N. + +endif + +config ARCH_HAS_ADD_PAGES + def_bool y + depends on ARCH_ENABLE_MEMORY_HOTPLUG + +config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE + def_bool y + +menu "Power management and ACPI options" + +config ARCH_HIBERNATION_HEADER + def_bool y + depends on HIBERNATION + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +config X86_APM_BOOT + def_bool y + depends on APM + +menuconfig APM + tristate "APM (Advanced Power Management) BIOS support" + depends on X86_32 && PM_SLEEP + help + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + If you select "Y" here, you can disable actual use of the APM + BIOS by passing the "apm=off" option to the kernel at boot time. + + Note that the APM support is almost completely disabled for + machines with more than one CPU. + + In order to use APM, you will need supporting software. For location + and more information, read + and the Battery Powered Linux mini-HOWTO, available from + . + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + This driver does not support the TI 4000M TravelMate and the ACER + 486/DX4/75 because they don't have compliant BIOSes. Many "green" + desktop machines also don't have compliant BIOSes, and this driver + may cause those machines to panic during the boot phase. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + + Some other things you should try when experiencing seemingly random, + "weird" problems: + + 1) make sure that you have enough swap space and that it is + enabled. + 2) pass the "idle=poll" option to the kernel + 3) switch on floating point emulation in the kernel and pass + the "no387" option to the kernel + 4) pass the "floppy=nodma" option to the kernel + 5) pass the "mem=4M" option to the kernel (thereby disabling + all but the first 4 MB of RAM) + 6) make sure that the CPU is not over clocked. + 7) read the sig11 FAQ at + 8) disable the cache from your BIOS settings + 9) install a fan for the video card or exchange video RAM + 10) install a better fan for the CPU + 11) exchange RAM chips + 12) exchange the motherboard. + + To compile this driver as a module, choose M here: the + module will be called apm. + +if APM + +config APM_IGNORE_USER_SUSPEND + bool "Ignore USER SUSPEND" + help + This option will ignore USER SUSPEND requests. On machines with a + compliant APM BIOS, you want to say N. However, on the NEC Versa M + series notebooks, it is necessary to say Y because of a BIOS bug. + +config APM_DO_ENABLE + bool "Enable PM at boot time" + help + Enable APM features at boot time. From page 36 of the APM BIOS + specification: "When disabled, the APM BIOS does not automatically + power manage devices, enter the Standby State, enter the Suspend + State, or take power saving steps in response to CPU Idle calls." + This driver will make CPU Idle calls when Linux is idle (unless this + feature is turned off -- see "Do CPU IDLE calls", below). This + should always save battery power, but more complicated APM features + will be dependent on your BIOS implementation. You may need to turn + this option off if your computer hangs at boot time when using APM + support, or if it beeps continuously instead of suspending. Turn + this off if you have a NEC UltraLite Versa 33/C or a Toshiba + T400CDT. This is off by default since most machines do fine without + this feature. + +config APM_CPU_IDLE + depends on CPU_IDLE + bool "Make CPU Idle calls when idle" + help + Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. + On some machines, this can activate improved power savings, such as + a slowed CPU clock rate, when the machine is idle. These idle calls + are made after the idle loop has run for some length of time (e.g., + 333 mS). On some machines, this will cause a hang at boot time or + whenever the CPU becomes idle. (On machines with more than one CPU, + this option does nothing.) + +config APM_DISPLAY_BLANK + bool "Enable console blanking using APM" + help + Enable console blanking using the APM. Some laptops can use this to + turn off the LCD backlight when the screen blanker of the Linux + virtual console blanks the screen. Note that this is only used by + the virtual console screen blanker, and won't turn off the backlight + when using the X Window system. This also doesn't have anything to + do with your VESA-compliant power-saving monitor. Further, this + option doesn't work for all laptops -- it might not turn off your + backlight at all, or it might print a lot of errors to the console, + especially if you are using gpm. + +config APM_ALLOW_INTS + bool "Allow interrupts during APM BIOS calls" + help + Normally we disable external interrupts while we are making calls to + the APM BIOS as a measure to lessen the effects of a badly behaving + BIOS implementation. The BIOS should reenable interrupts if it + needs to. Unfortunately, some BIOSes do not -- especially those in + many of the newer IBM Thinkpads. If you experience hangs when you + suspend, try setting this to Y. Otherwise, say N. + +endif # APM + +source "drivers/cpufreq/Kconfig" + +source "drivers/cpuidle/Kconfig" + +source "drivers/idle/Kconfig" + +endmenu + +menu "Bus options (PCI etc.)" + +choice + prompt "PCI access mode" + depends on X86_32 && PCI + default PCI_GOANY + help + On PCI systems, the BIOS can be used to detect the PCI devices and + determine their configuration. However, some old PCI motherboards + have BIOS bugs and may crash if this is done. Also, some embedded + PCI-based systems don't have any BIOS at all. Linux can also try to + detect the PCI hardware directly without using the BIOS. + + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOBIOS + bool "BIOS" + +config PCI_GOMMCONFIG + bool "MMConfig" + +config PCI_GODIRECT + bool "Direct" + +config PCI_GOOLPC + bool "OLPC XO-1" + depends on OLPC + +config PCI_GOANY + bool "Any" + +endchoice + +config PCI_BIOS + def_bool y + depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY) + +# x86-64 doesn't support PCI BIOS access from long mode so always go direct. +config PCI_DIRECT + def_bool y + depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG)) + +config PCI_MMCONFIG + bool "Support mmconfig PCI config space access" if X86_64 + default y + depends on PCI && (ACPI || JAILHOUSE_GUEST) + depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG) + +config PCI_OLPC + def_bool y + depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) + +config PCI_XEN + def_bool y + depends on PCI && XEN + +config MMCONF_FAM10H + def_bool y + depends on X86_64 && PCI_MMCONFIG && ACPI + +config PCI_CNB20LE_QUIRK + bool "Read CNB20LE Host Bridge Windows" if EXPERT + depends on PCI + help + Read the PCI windows out of the CNB20LE host bridge. This allows + PCI hotplug to work on systems with the CNB20LE chipset which do + not have ACPI. + + There's no public spec for this chipset, and this functionality + is known to be incomplete. + + You should say N unless you know you need this. + +config ISA_BUS + bool "ISA bus support on modern systems" if EXPERT + help + Expose ISA bus device drivers and options available for selection and + configuration. Enable this option if your target machine has an ISA + bus. ISA is an older system, displaced by PCI and newer bus + architectures -- if your target machine is modern, it probably does + not have an ISA bus. + + If unsure, say N. + +# x86_64 have no ISA slots, but can have ISA-style DMA. +config ISA_DMA_API + bool "ISA-style DMA support" if (X86_64 && EXPERT) + default y + help + Enables ISA-style DMA support for devices requiring such controllers. + If unsure, say Y. + +if X86_32 + +config ISA + bool "ISA support" + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +config SCx200 + tristate "NatSemi SCx200 support" + help + This provides basic support for National Semiconductor's + (now AMD's) Geode processors. The driver probes for the + PCI-IDs of several on-chip devices, so its a good dependency + for other scx200_* drivers. + + If compiled as a module, the driver is named scx200. + +config SCx200HR_TIMER + tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" + depends on SCx200 + default y + help + This driver provides a clocksource built upon the on-chip + 27MHz high-resolution timer. Its also a workaround for + NSC Geode SC-1100's buggy TSC, which loses time when the + processor goes idle (as is done by the scheduler). The + other workaround is idle=poll boot option. + +config OLPC + bool "One Laptop Per Child support" + depends on !X86_PAE + select GPIOLIB + select OF + select OF_PROMTREE + select IRQ_DOMAIN + select OLPC_EC + help + Add support for detecting the unique features of the OLPC + XO hardware. + +config OLPC_XO1_PM + bool "OLPC XO-1 Power Management" + depends on OLPC && MFD_CS5535=y && PM_SLEEP + help + Add support for poweroff and suspend of the OLPC XO-1 laptop. + +config OLPC_XO1_RTC + bool "OLPC XO-1 Real Time Clock" + depends on OLPC_XO1_PM && RTC_DRV_CMOS + help + Add support for the XO-1 real time clock, which can be used as a + programmable wakeup source. + +config OLPC_XO1_SCI + bool "OLPC XO-1 SCI extras" + depends on OLPC && OLPC_XO1_PM && GPIO_CS5535=y + depends on INPUT=y + select POWER_SUPPLY + help + Add support for SCI-based features of the OLPC XO-1 laptop: + - EC-driven system wakeups + - Power button + - Ebook switch + - Lid switch + - AC adapter status updates + - Battery status updates + +config OLPC_XO15_SCI + bool "OLPC XO-1.5 SCI extras" + depends on OLPC && ACPI + select POWER_SUPPLY + help + Add support for SCI-based features of the OLPC XO-1.5 laptop: + - EC-driven system wakeups + - AC adapter status updates + - Battery status updates + +config ALIX + bool "PCEngines ALIX System Support (LED setup)" + select GPIOLIB + help + This option enables system support for the PCEngines ALIX. + At present this just sets up LEDs for GPIO control on + ALIX2/3/6 boards. However, other system specific setup should + get added here. + + Note: You must still enable the drivers for GPIO and LED support + (GPIO_CS5535 & LEDS_GPIO) to actually use the LEDs + + Note: You have to set alix.force=1 for boards with Award BIOS. + +config NET5501 + bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)" + select GPIOLIB + help + This option enables system support for the Soekris Engineering net5501. + +config GEOS + bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)" + select GPIOLIB + depends on DMI + help + This option enables system support for the Traverse Technologies GEOS. + +config TS5500 + bool "Technologic Systems TS-5500 platform support" + depends on MELAN + select CHECK_SIGNATURE + select NEW_LEDS + select LEDS_CLASS + help + This option enables system support for the Technologic Systems TS-5500. + +endif # X86_32 + +config AMD_NB + def_bool y + depends on CPU_SUP_AMD && PCI + +endmenu + +menu "Binary Emulations" + +config IA32_EMULATION + bool "IA32 Emulation" + depends on X86_64 + select ARCH_WANT_OLD_COMPAT_IPC + select BINFMT_ELF + select COMPAT_OLD_SIGACTION + help + Include code to run legacy 32-bit programs under a + 64-bit kernel. You should likely turn this on, unless you're + 100% sure that you don't have any 32-bit programs left. + +config X86_X32_ABI + bool "x32 ABI for 64-bit mode" + depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) + help + Include code to run binaries for the x32 native 32-bit ABI + for 64-bit processors. An x32 process gets access to the + full 64-bit register file and wide data path while leaving + pointers at 32 bits for smaller memory footprint. + +config COMPAT_32 + def_bool y + depends on IA32_EMULATION || X86_32 + select HAVE_UID16 + select OLD_SIGSUSPEND3 + +config COMPAT + def_bool y + depends on IA32_EMULATION || X86_X32_ABI + +config COMPAT_FOR_U64_ALIGNMENT + def_bool y + depends on COMPAT + +endmenu + +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + +source "arch/x86/kvm/Kconfig" + +source "arch/x86/Kconfig.assembler" diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler new file mode 100644 index 000000000..26b8c08e2 --- /dev/null +++ b/arch/x86/Kconfig.assembler @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + +config AS_AVX512 + def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5) + help + Supported by binutils >= 2.25 and LLVM integrated assembler + +config AS_SHA1_NI + def_bool $(as-instr,sha1msg1 %xmm0$(comma)%xmm1) + help + Supported by binutils >= 2.24 and LLVM integrated assembler + +config AS_SHA256_NI + def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) + help + Supported by binutils >= 2.24 and LLVM integrated assembler +config AS_TPAUSE + def_bool $(as-instr,tpause %ecx) + help + Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu new file mode 100644 index 000000000..542377cd4 --- /dev/null +++ b/arch/x86/Kconfig.cpu @@ -0,0 +1,519 @@ +# SPDX-License-Identifier: GPL-2.0 +# Put here option for CPU selection and depending optimization +choice + prompt "Processor family" + default M686 if X86_32 + default GENERIC_CPU if X86_64 + help + This is the processor type of your CPU. This information is + used for optimizing purposes. In order to compile a kernel + that can run on all supported x86 CPU types (albeit not + optimally fast), you can specify "486" here. + + Note that the 386 is no longer supported, this includes + AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 486DLC/DLC2, + UMC 486SX-S and the NexGen Nx586. + + The kernel will not necessarily run on earlier architectures than + the one you have chosen, e.g. a Pentium optimized kernel will run on + a PPro, but not necessarily on a i486. + + Here are the settings recommended for greatest speed: + - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + - "586" for generic Pentium CPUs lacking the TSC + (time stamp counter) register. + - "Pentium-Classic" for the Intel Pentium. + - "Pentium-MMX" for the Intel Pentium MMX. + - "Pentium-Pro" for the Intel Pentium Pro. + - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. + - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs. + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. + - "Winchip-2" for IDT Winchips with 3dNow! capabilities. + - "AMD Elan" for the 32-bit AMD Elan embedded CPU. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). + - "VIA C7" for VIA C7. + - "Intel P4" for the Pentium 4/Netburst microarchitecture. + - "Core 2/newer Xeon" for all core2 and newer Intel CPUs. + - "Intel Atom" for the Atom-microarchitecture CPUs. + - "Generic-x86-64" for a kernel which runs on any x86-64 CPU. + + See each option's help text for additional details. If you don't know + what to do, choose "486". + +config M486SX + bool "486SX" + depends on X86_32 + help + Select this for an 486-class CPU without an FPU such as + AMD/Cyrix/IBM/Intel SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5S. + +config M486 + bool "486DX" + depends on X86_32 + help + Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel + 486DX/DX2/DX4 and UMC U5D. + +config M586 + bool "586/K5/5x86/6x86/6x86MX" + depends on X86_32 + help + Select this for an 586 or 686 series processor such as the AMD K5, + the Cyrix 5x86, 6x86 and 6x86MX. This choice does not + assume the RDTSC (Read Time Stamp Counter) instruction. + +config M586TSC + bool "Pentium-Classic" + depends on X86_32 + help + Select this for a Pentium Classic processor with the RDTSC (Read + Time Stamp Counter) instruction for benchmarking. + +config M586MMX + bool "Pentium-MMX" + depends on X86_32 + help + Select this for a Pentium with the MMX graphics/multimedia + extended instructions. + +config M686 + bool "Pentium-Pro" + depends on X86_32 + help + Select this for Intel Pentium Pro chips. This enables the use of + Pentium Pro extended instructions, and disables the init-time guard + against the f00f bug found in earlier Pentiums. + +config MPENTIUMII + bool "Pentium-II/Celeron(pre-Coppermine)" + depends on X86_32 + help + Select this for Intel chips based on the Pentium-II and + pre-Coppermine Celeron core. This option enables an unaligned + copy optimization, compiles the kernel with optimization flags + tailored for the chip, and applies any applicable Pentium Pro + optimizations. + +config MPENTIUMIII + bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + depends on X86_32 + help + Select this for Intel chips based on the Pentium-III and + Celeron-Coppermine core. This option enables use of some + extended prefetch instructions in addition to the Pentium II + extensions. + +config MPENTIUMM + bool "Pentium M" + depends on X86_32 + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" + depends on X86_32 + help + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, Pentium D, P4-based Celeron and Xeon, and + Pentium-4 M (not Pentium M) chips. This option enables compile + flags optimized for the chip, uses the correct cache line size, and + applies any applicable optimizations. + + CPUIDs: F[0-6][1-A] (in /proc/cpuinfo show = cpu family : 15 ) + + Select this for: + Pentiums (Pentium 4, Pentium D, Celeron, Celeron D) corename: + -Willamette + -Northwood + -Mobile Pentium 4 + -Mobile Pentium 4 M + -Extreme Edition (Gallatin) + -Prescott + -Prescott 2M + -Cedar Mill + -Presler + -Smithfiled + Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename: + -Foster + -Prestonia + -Gallatin + -Nocona + -Irwindale + -Cranford + -Potomac + -Paxville + -Dempsey + + +config MK6 + bool "K6/K6-II/K6-III" + depends on X86_32 + help + Select this for an AMD K6-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK7 + bool "Athlon/Duron/K7" + depends on X86_32 + help + Select this for an AMD Athlon K7-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK8 + bool "Opteron/Athlon64/Hammer/K8" + help + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MCRUSOE + bool "Crusoe" + depends on X86_32 + help + Select this for a Transmeta Crusoe processor. Treats the processor + like a 586 with TSC, and sets some GCC optimization flags (like a + Pentium Pro with no alignment requirements). + +config MEFFICEON + bool "Efficeon" + depends on X86_32 + help + Select this for a Transmeta Efficeon processor. + +config MWINCHIPC6 + bool "Winchip-C6" + depends on X86_32 + help + Select this for an IDT Winchip C6 chip. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP3D + bool "Winchip-2/Winchip-2A/Winchip-3" + depends on X86_32 + help + Select this for an IDT Winchip-2, 2A or 3. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. Also enable out of order memory + stores for this CPU, which can increase performance of some + operations. + +config MELAN + bool "AMD Elan" + depends on X86_32 + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + +config MGEODEGX1 + bool "GeodeGX1" + depends on X86_32 + help + Select this for a Geode GX1 (Cyrix MediaGX) chip. + +config MGEODE_LX + bool "Geode GX/LX" + depends on X86_32 + help + Select this for AMD Geode GX and LX processors. + +config MCYRIXIII + bool "CyrixIII/VIA-C3" + depends on X86_32 + help + Select this for a Cyrix III or C3 chip. Presently Linux and GCC + treat this chip as a generic 586. Whilst the CPU is 686 class, + it lacks the cmov extension which gcc assumes is present when + generating 686 code. + Note that Nehemiah (Model 9) and above will not boot with this + kernel due to them lacking the 3DNow! instructions used in earlier + incarnations of the CPU. + +config MVIAC3_2 + bool "VIA C3-2 (Nehemiah)" + depends on X86_32 + help + Select this for a VIA C3 "Nehemiah". Selecting this enables usage + of SSE and tells gcc to treat the CPU as a 686. + Note, this kernel will not boot on older (pre model 9) C3s. + +config MVIAC7 + bool "VIA C7" + depends on X86_32 + help + Select this for a VIA C7. Selecting this uses the correct cache + shift and tells gcc to treat the CPU as a 686. + +config MPSC + bool "Intel P4 / older Netburst based Xeon" + depends on X86_64 + help + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey + Xeon CPUs with Intel 64bit which is compatible with x86-64. + Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the + Netburst core and shouldn't use this option. You can distinguish them + using the cpu family field + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. + +config MCORE2 + bool "Core 2/newer Xeon" + help + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and + 53xx) CPUs. You can distinguish newer from older Xeons by the CPU + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) + +config MATOM + bool "Intel Atom" + help + + Select this for the Intel Atom platform. Intel Atom CPUs have an + in-order pipelining architecture and thus can benefit from + accordingly optimized code. Use a recent GCC with specific Atom + support in order to fully benefit from selecting this option. + +config GENERIC_CPU + bool "Generic-x86-64" + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. + +endchoice + +config X86_GENERIC + bool "Generic x86 support" + depends on X86_32 + help + Instead of just including optimizations for the selected + x86 variant (e.g. PII, Crusoe or Athlon), include some more + generic optimizations as well. This will make the kernel + perform better on x86 CPUs other than that selected. + + This is really intended for distributors who need more + generic optimizations. + +# +# Define implied options from the CPU selection here +config X86_INTERNODE_CACHE_SHIFT + int + default "12" if X86_VSMP + default X86_L1_CACHE_SHIFT + +config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || MPSC + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "4" if MELAN || M486SX || M486 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + +config X86_F00F_BUG + def_bool y + depends on M586MMX || M586TSC || M586 || M486SX || M486 + +config X86_INVD_BUG + def_bool y + depends on M486SX || M486 + +config X86_ALIGNMENT_16 + def_bool y + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1 + +config X86_INTEL_USERCOPY + def_bool y + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + +config X86_USE_PPRO_CHECKSUM + def_bool y + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + +# +# P6_NOPs are a relatively minor optimization that require a family >= +# 6 processor, except that it is broken on certain VIA chips. +# Furthermore, AMD chips prefer a totally different sequence of NOPs +# (which work on all CPUs). In addition, it looks like Virtual PC +# does not understand them. +# +# As a result, disallow these if we're not compiling for X86_64 (these +# NOPs do work on all x86-64 capable chips); the list of processors in +# the right-hand clause are the cores that benefit from this optimization. +# +config X86_P6_NOP + def_bool y + depends on X86_64 + depends on (MCORE2 || MPENTIUM4 || MPSC) + +config X86_TSC + def_bool y + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + +config X86_CMPXCHG64 + def_bool y + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 + +# this should be set for all -march=.. options where the compiler +# generates cmov. +config X86_CMOV + def_bool y + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + +config X86_MINIMUM_CPU_FAMILY + int + default "64" if X86_64 + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "5" if X86_32 && X86_CMPXCHG64 + default "4" + +config X86_DEBUGCTLMSR + def_bool y + depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML + +config IA32_FEAT_CTL + def_bool y + depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN + +config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL && X86_FEATURE_NAMES + +menuconfig PROCESSOR_SELECT + bool "Supported processor vendors" if EXPERT + help + This lets you choose what x86 vendor support code your kernel + will include. + +config CPU_SUP_INTEL + default y + bool "Support Intel processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for Intel processors + + You need this enabled if you want your kernel to run on an + Intel CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on an Intel + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_CYRIX_32 + default y + bool "Support Cyrix processors" if PROCESSOR_SELECT + depends on M486SX || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) + help + This enables detection, tunings and quirks for Cyrix processors + + You need this enabled if you want your kernel to run on a + Cyrix CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Cyrix + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_AMD + default y + bool "Support AMD processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for AMD processors + + You need this enabled if you want your kernel to run on an + AMD CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on an AMD + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_HYGON + default y + bool "Support Hygon processors" if PROCESSOR_SELECT + select CPU_SUP_AMD + help + This enables detection, tunings and quirks for Hygon processors + + You need this enabled if you want your kernel to run on an + Hygon CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on an Hygon + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_CENTAUR + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for Centaur processors + + You need this enabled if you want your kernel to run on a + Centaur CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Centaur + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_TRANSMETA_32 + default y + bool "Support Transmeta processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables detection, tunings and quirks for Transmeta processors + + You need this enabled if you want your kernel to run on a + Transmeta CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Transmeta + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_UMC_32 + default y + bool "Support UMC processors" if PROCESSOR_SELECT + depends on M486SX || M486 || (EXPERT && !64BIT) + help + This enables detection, tunings and quirks for UMC processors + + You need this enabled if you want your kernel to run on a + UMC CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a UMC + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_ZHAOXIN + default y + bool "Support Zhaoxin processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for Zhaoxin processors + + You need this enabled if you want your kernel to run on a + Zhaoxin CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_VORTEX_32 + default y + bool "Support Vortex processors" if PROCESSOR_SELECT + depends on X86_32 + help + This enables detection, tunings and quirks for Vortex processors + + You need this enabled if you want your kernel to run on a + Vortex CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. + + If unsure, say N. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug new file mode 100644 index 000000000..bdfe08f1a --- /dev/null +++ b/arch/x86/Kconfig.debug @@ -0,0 +1,277 @@ +# SPDX-License-Identifier: GPL-2.0 + +config EARLY_PRINTK_USB + bool + +config X86_VERBOSE_BOOTUP + bool "Enable verbose x86 bootup info messages" + default y + help + Enables the informational output from the decompression stage + (e.g. bzImage) of the boot. If you disable this you will still + see errors. Disable this if you want silent bootup. + +config EARLY_PRINTK + bool "Early printk" if EXPERT + default y + help + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally say N here, + unless you want to debug such a crash. + +config EARLY_PRINTK_DBGP + bool "Early printk via EHCI debug port" + depends on EARLY_PRINTK && PCI + select EARLY_PRINTK_USB + help + Write kernel log output directly into the EHCI debug port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally say N here, + unless you want to debug such a crash. You need usb debug device. + +config EARLY_PRINTK_USB_XDBC + bool "Early printk via the xHCI debug port" + depends on EARLY_PRINTK && PCI + select EARLY_PRINTK_USB + help + Write kernel log output directly into the xHCI debug port. + + One use for this feature is kernel debugging, for example when your + machine crashes very early before the regular console code is + initialized. Other uses include simpler, lockless logging instead of + a full-blown printk console driver + klogd. + + For normal production environments this is normally not recommended, + because it doesn't feed events into klogd/syslogd and doesn't try to + print anything on the screen. + + You should normally say N here, unless you want to debug early + crashes or need a very simple printk logging facility. + +config EFI_PGT_DUMP + bool "Dump the EFI pagetable" + depends on EFI + select PTDUMP_CORE + help + Enable this if you want to dump the EFI page table before + enabling virtual mode. This can be used to debug miscellaneous + issues with the mapping of the EFI runtime regions into that + table. + +config DEBUG_TLBFLUSH + bool "Set upper limit of TLB entries to flush one-by-one" + depends on DEBUG_KERNEL + help + X86-only for now. + + This option allows the user to tune the amount of TLB entries the + kernel flushes one-by-one instead of doing a full TLB flush. In + certain situations, the former is cheaper. This is controlled by the + tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it + to -1, the code flushes the whole TLB unconditionally. Otherwise, + for positive values of it, the kernel will use single TLB entry + invalidating instructions according to the following formula: + + flush_entries <= active_tlb_entries / 2^tlb_flushall_shift + + If in doubt, say "N". + +config IOMMU_DEBUG + bool "Enable IOMMU debugging" + depends on GART_IOMMU && DEBUG_KERNEL + depends on X86_64 + help + Force the IOMMU to on even when you have less than 4GB of + memory and add debugging code. On overflow always panic. And + allow to enable IOMMU leak tracing. Can be disabled at boot + time with iommu=noforce. This will also enable scatter gather + list merging. Currently not recommended for production + code. When you use it make sure you have a big enough + IOMMU/AGP aperture. Most of the options enabled by this can + be set more finegrained using the iommu= command line + options. See Documentation/x86/x86_64/boot-options.rst for more + details. + +config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on IOMMU_DEBUG && DMA_API_DEBUG + help + Add a simple leak tracer to the IOMMU code. This is useful when you + are debugging a buggy device driver that leaks IOMMU mappings. + +config HAVE_MMIOTRACE_SUPPORT + def_bool y + +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL && INSTRUCTION_DECODER + depends on !COMPILE_TEST + help + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + +choice + prompt "IO delay type" + default IO_DELAY_0X80 + +config IO_DELAY_0X80 + bool "port 0x80 based port-IO delay [recommended]" + help + This is the traditional Linux IO delay used for in/out_p. + It is the most tested hence safest selection here. + +config IO_DELAY_0XED + bool "port 0xed based port-IO delay" + help + Use port 0xed as the IO delay. This frees up port 0x80 which is + often used as a hardware-debug port. + +config IO_DELAY_UDELAY + bool "udelay based port-IO delay" + help + Use udelay(2) as the IO delay method. This provides the delay + while not having any side-effect on the IO port space. + +config IO_DELAY_NONE + bool "no port-IO delay" + help + No port-IO delay. Will break on old boxes that require port-IO + delay for certain operations. Should work on most new machines. + +endchoice + +config DEBUG_BOOT_PARAMS + bool "Debug boot parameters" + depends on DEBUG_KERNEL + depends on DEBUG_FS + help + This option will cause struct boot_params to be exported via debugfs. + +config CPA_DEBUG + bool "CPA self-test code" + depends on DEBUG_KERNEL + help + Do change_page_attr() self-tests every 30 seconds. + +config DEBUG_ENTRY + bool "Debug low-level entry code" + depends on DEBUG_KERNEL + help + This option enables sanity checks in x86's low-level entry code. + Some of these sanity checks may slow down kernel entries and + exits or otherwise impact performance. + + If unsure, say N. + +config DEBUG_NMI_SELFTEST + bool "NMI Selftest" + depends on DEBUG_KERNEL && X86_LOCAL_APIC + help + Enabling this option turns on a quick NMI selftest to verify + that the NMI behaves correctly. + + This might help diagnose strange hangs that rely on NMI to + function properly. + + If unsure, say N. + +config DEBUG_IMR_SELFTEST + bool "Isolated Memory Region self test" + depends on INTEL_IMR + help + This option enables automated sanity testing of the IMR code. + Some simple tests are run to verify IMR bounds checking, alignment + and overlapping. This option is really only useful if you are + debugging an IMR memory map or are modifying the IMR code and want to + test your changes. + + If unsure say N here. + +config X86_DEBUG_FPU + bool "Debug the x86 FPU code" + depends on DEBUG_KERNEL + default y + help + If this option is enabled then there will be extra sanity + checks and (boot time) debug printouts added to the kernel. + This debugging adds some small amount of runtime overhead + to the kernel. + + If unsure, say N. + +config PUNIT_ATOM_DEBUG + tristate "ATOM Punit debug driver" + depends on PCI + select DEBUG_FS + select IOSF_MBI + help + This is a debug driver, which gets the power states + of all Punit North Complex devices. The power states of + each device is exposed as part of the debugfs interface. + The current power state can be read from + /sys/kernel/debug/punit_atom/dev_power_state + +choice + prompt "Choose kernel unwinder" + default UNWINDER_ORC if X86_64 + default UNWINDER_FRAME_POINTER if X86_32 + help + This determines which method will be used for unwinding kernel stack + traces for panics, oopses, bugs, warnings, perf, /proc//stack, + livepatch, lockdep, and more. + +config UNWINDER_ORC + bool "ORC unwinder" + depends on X86_64 + select OBJTOOL + help + This option enables the ORC (Oops Rewind Capability) unwinder for + unwinding kernel stack traces. It uses a custom data format which is + a simplified version of the DWARF Call Frame Information standard. + + This unwinder is more accurate across interrupt entry frames than the + frame pointer unwinder. It also enables a 5-10% performance + improvement across the entire kernel compared to frame pointers. + + Enabling this option will increase the kernel's runtime memory usage + by roughly 2-4MB, depending on your kernel config. + +config UNWINDER_FRAME_POINTER + bool "Frame pointer unwinder" + select FRAME_POINTER + help + This option enables the frame pointer unwinder for unwinding kernel + stack traces. + + The unwinder itself is fast and it uses less RAM than the ORC + unwinder, but the kernel text size will grow by ~3% and the kernel's + overall performance will degrade by roughly 5-10%. + +config UNWINDER_GUESS + bool "Guess unwinder" + depends on EXPERT + depends on !STACKDEPOT + help + This option enables the "guess" unwinder for unwinding kernel stack + traces. It scans the stack and reports every kernel text address it + finds. Some of the addresses it reports may be incorrect. + + While this option often produces false positives, it can still be + useful in many cases. Unlike the other unwinders, it has no runtime + overhead. + +endchoice + +config FRAME_POINTER + depends on !UNWINDER_ORC && !UNWINDER_GUESS + bool diff --git a/arch/x86/Makefile b/arch/x86/Makefile new file mode 100644 index 000000000..3419ffa2a --- /dev/null +++ b/arch/x86/Makefile @@ -0,0 +1,325 @@ +# SPDX-License-Identifier: GPL-2.0 +# Unified Makefile for i386 and x86_64 + +# select defconfig based on actual architecture +ifeq ($(ARCH),x86) + ifeq ($(shell uname -m),x86_64) + KBUILD_DEFCONFIG := x86_64_defconfig + else + KBUILD_DEFCONFIG := i386_defconfig + endif +else + KBUILD_DEFCONFIG := $(ARCH)_defconfig +endif + +ifdef CONFIG_CC_IS_GCC +RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) +endif +ifdef CONFIG_CC_IS_CLANG +RETPOLINE_CFLAGS := -mretpoline-external-thunk +RETPOLINE_VDSO_CFLAGS := -mretpoline +endif +RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + +export RETHUNK_CFLAGS +export RETPOLINE_CFLAGS +export RETPOLINE_VDSO_CFLAGS + +# For gcc stack alignment is specified with -mpreferred-stack-boundary, +# clang has the option -mstack-alignment for that purpose. +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align4 := -mpreferred-stack-boundary=2 + cc_stack_align8 := -mpreferred-stack-boundary=3 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align4 := -mstack-alignment=4 + cc_stack_align8 := -mstack-alignment=8 +endif + +# How to compile the 16-bit code. Note we always compile for -march=i386; +# that way we can complain to the user if the CPU is insufficient. +REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ + -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ + -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ + -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) + +REALMODE_CFLAGS += -ffreestanding +REALMODE_CFLAGS += -fno-stack-protector +REALMODE_CFLAGS += -Wno-address-of-packed-member +REALMODE_CFLAGS += $(cc_stack_align4) +REALMODE_CFLAGS += $(CLANG_FLAGS) +export REALMODE_CFLAGS + +# BITS is used as extension for files which are available in a 32 bit +# and a 64 bit version to simplify shared Makefiles. +# e.g.: obj-y += foo_$(BITS).o +export BITS + +# +# Prevent GCC from generating any FP code by mistake. +# +# This must happen before we try the -mpreferred-stack-boundary, see: +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 +# +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 + +ifeq ($(CONFIG_X86_KERNEL_IBT),y) +# +# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate +# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK +# for jump-tables, as such, disable jump-tables for now. +# +# (jump-tables are implicitly disabled by RETPOLINE) +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 +# +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) +else +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif + +ifeq ($(CONFIG_X86_32),y) + BITS := 32 + UTS_MACHINE := i386 + CHECKFLAGS += -D__i386__ + + KBUILD_AFLAGS += -m32 + KBUILD_CFLAGS += -m32 + + KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return + + # Never want PIC in a 32-bit kernel, prevent breakage with GCC built + # with nonstandard options + KBUILD_CFLAGS += -fno-pic + + # Align the stack to the register width instead of using the default + # alignment of 16 bytes. This reduces stack usage and the number of + # alignment instructions. + KBUILD_CFLAGS += $(cc_stack_align4) + + # CPU-specific tuning. Anything which can be shared with UML should go here. + include $(srctree)/arch/x86/Makefile_32.cpu + KBUILD_CFLAGS += $(cflags-y) + + # temporary until string.h is fixed + KBUILD_CFLAGS += -ffreestanding + + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) + KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard + else + KBUILD_CFLAGS += -mstack-protector-guard=global + endif + endif +else + BITS := 64 + UTS_MACHINE := x86_64 + CHECKFLAGS += -D__x86_64__ + + KBUILD_AFLAGS += -m64 + KBUILD_CFLAGS += -m64 + + # Align jump targets to 1 byte, not the default 16 bytes: + KBUILD_CFLAGS += $(call cc-option,-falign-jumps=1) + + # Pack loops tightly as well: + KBUILD_CFLAGS += $(call cc-option,-falign-loops=1) + + # Don't autogenerate traditional x87 instructions + KBUILD_CFLAGS += -mno-80387 + KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) + + # By default gcc and clang use a stack alignment of 16 bytes for x86. + # However the standard kernel entry on x86-64 leaves the stack on an + # 8-byte boundary. If the compiler isn't informed about the actual + # alignment it will generate extra alignment instructions for the + # default alignment which keep the stack *mis*aligned. + # Furthermore an alignment to the register width reduces stack usage + # and the number of alignment instructions. + KBUILD_CFLAGS += $(cc_stack_align8) + + # Use -mskip-rax-setup if supported. + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) + + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) + cflags-$(CONFIG_MK8) += -march=k8 + cflags-$(CONFIG_MPSC) += -march=nocona + cflags-$(CONFIG_MCORE2) += -march=core2 + cflags-$(CONFIG_MATOM) += -march=atom + cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic + KBUILD_CFLAGS += $(cflags-y) + + rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 + rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona + rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 + rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom + rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic + KBUILD_RUSTFLAGS += $(rustflags-y) + + KBUILD_CFLAGS += -mno-red-zone + KBUILD_CFLAGS += -mcmodel=kernel + KBUILD_RUSTFLAGS += -Cno-redzone=y + KBUILD_RUSTFLAGS += -Ccode-model=kernel +endif + +# +# If the function graph tracer is used with mcount instead of fentry, +# '-maccumulate-outgoing-args' is needed to prevent a GCC bug +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109) +# +ifdef CONFIG_FUNCTION_GRAPH_TRACER + ifndef CONFIG_HAVE_FENTRY + ACCUMULATE_OUTGOING_ARGS := 1 + endif +endif + +ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) + # This compiler flag is not supported by Clang: + KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) +endif + +# Workaround for a gcc prelease that unfortunately was shipped in a suse release +KBUILD_CFLAGS += -Wno-sign-compare +# +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) + # Additionally, avoid generating expensive indirect jumps which + # are subject to retpolines for small number of switch cases. + # clang turns off jump table generation by default when under + # retpoline builds, however, gcc does not for x86. This has + # only been fixed starting from gcc stable version 8.4.0 and + # onwards, but not for older ones. See gcc bug #86952. + ifndef CONFIG_CC_IS_CLANG + KBUILD_CFLAGS += -fno-jump-tables + endif +endif + +ifdef CONFIG_SLS + KBUILD_CFLAGS += -mharden-sls=all +endif + +KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) + +ifdef CONFIG_LTO_CLANG +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif +endif + +ifdef CONFIG_X86_NEED_RELOCS +LDFLAGS_vmlinux := --emit-relocs --discard-none +else +LDFLAGS_vmlinux := +endif + +# +# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to +# the linker to force 2MB page size regardless of the default page size used +# by the linker. +# +ifdef CONFIG_X86_64 +LDFLAGS_vmlinux += -z max-page-size=0x200000 +endif + + +archscripts: scripts_basic + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + +### +# Syscall table generation + +archheaders: + $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all + +### +# Kernel objects + +libs-y += arch/x86/lib/ + +# drivers-y are linked after core-y +drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ +drivers-$(CONFIG_PCI) += arch/x86/pci/ + +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/x86/power/ + +drivers-$(CONFIG_FB) += arch/x86/video/ + +#### +# boot loader support. Several targets are kept for legacy purposes + +boot := arch/x86/boot + +BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage + +PHONY += bzImage $(BOOT_TARGETS) + +# Default kernel to build +all: bzImage + +# KBUILD_IMAGE specify target image being built +KBUILD_IMAGE := $(boot)/bzImage + +bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif + $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ + +$(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +PHONY += install +install: + $(call cmd,install) + +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@ + +archprepare: checkbin +checkbin: +ifdef CONFIG_RETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + @echo "You are building kernel with non-retpoline compiler." >&2 + @echo "Please update your compiler." >&2 + @false +endif +endif + +archclean: + $(Q)rm -rf $(objtree)/arch/i386 + $(Q)rm -rf $(objtree)/arch/x86_64 + +define archhelp + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' + echo ' install - Install kernel using (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or install to ' + echo ' $$(INSTALL_PATH) and run lilo' + echo '' + echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' + echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' + echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)' + echo ' hdimage - Create a BIOS/EFI hard disk image (arch/x86/boot/hdimage)' + echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)' + echo ' bzdisk/fdimage*/hdimage/isoimage also accept:' + echo ' FDARGS="..." arguments for the booted kernel' + echo ' FDINITRD=file initrd for the booted kernel' + echo '' + echo ' kvm_guest.config - Enable Kconfig items for running this kernel as a KVM guest' + echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest' + echo ' x86_debug.config - Enable tip tree debugging options for testing' + +endef diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um new file mode 100644 index 000000000..1aa64846e --- /dev/null +++ b/arch/x86/Makefile.um @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 +core-y += arch/x86/crypto/ + +# +# Disable SSE and other FP/SIMD instructions to match normal x86 +# This is required to work around issues in older LLVM versions, but breaks +# GCC versions < 11. See: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 +# +ifeq ($(CONFIG_CC_IS_CLANG),y) +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 +endif + +ifeq ($(CONFIG_X86_32),y) +START := 0x8048000 + +KBUILD_LDFLAGS += -m elf_i386 +ELF_ARCH := i386 +ELF_FORMAT := elf32-i386 +CHECKFLAGS += -D__i386__ + +KBUILD_CFLAGS += $(call cc-option,-m32) +KBUILD_AFLAGS += $(call cc-option,-m32) +LINK-y += $(call cc-option,-m32) + +LDS_EXTRA := -Ui386 +export LDS_EXTRA + +# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. +include arch/x86/Makefile_32.cpu + +# prevent gcc from keeping the stack 16 byte aligned. Taken from i386. +cflags-y += $(call cc-option,-mpreferred-stack-boundary=2) + +# Prevent sprintf in nfsd from being converted to strcpy and resulting in +# an unresolved reference. +cflags-y += -ffreestanding + +KBUILD_CFLAGS += $(cflags-y) + +else + +START := 0x60000000 + +KBUILD_CFLAGS += -fno-builtin -m64 + +CHECKFLAGS += -m64 -D__x86_64__ +KBUILD_AFLAGS += -m64 +KBUILD_LDFLAGS += -m elf_x86_64 +KBUILD_CPPFLAGS += -m64 + +ELF_ARCH := i386:x86-64 +ELF_FORMAT := elf64-x86-64 + +# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. + +LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64 +LINK-y += -m64 + +endif diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu new file mode 100644 index 000000000..94834c4b5 --- /dev/null +++ b/arch/x86/Makefile_32.cpu @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0 +# CPU tuning section - shared with UML. +# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. + +tune = $(call cc-option,-mtune=$(1),$(2)) + +ifdef CONFIG_CC_IS_CLANG +align := -falign-functions=0 $(call cc-option,-falign-jumps=0) $(call cc-option,-falign-loops=0) +else +align := -falign-functions=0 -falign-jumps=0 -falign-loops=0 +endif + +cflags-$(CONFIG_M486SX) += -march=i486 +cflags-$(CONFIG_M486) += -march=i486 +cflags-$(CONFIG_M586) += -march=i586 +cflags-$(CONFIG_M586TSC) += -march=i586 +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx +cflags-$(CONFIG_M686) += -march=i686 +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) +cflags-$(CONFIG_MK6) += -march=k6 +# Please note, that patches that add -march=athlon-xp and friends are pointless. +# They make zero difference whatsosever to performance at this time. +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align) +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align) +cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align) +cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) +cflags-$(CONFIG_MVIAC7) += -march=i686 +cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) +cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + +# AMD Elan support +cflags-$(CONFIG_MELAN) += -march=i486 + +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) +# add at the end to overwrite eventual tuning options from earlier +# cpu entries +cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) + +# Bug fix for binutils: this option is required in order to keep +# binutils from generating NOPL instructions against our will. +ifneq ($(CONFIG_X86_P6_NOP),y) +cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,) +endif diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore new file mode 100644 index 000000000..1189be057 --- /dev/null +++ b/arch/x86/boot/.gitignore @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +bootsect +bzImage +cpustr.h +mkcpustr +voffset.h +zoffset.h +setup +setup.bin +setup.elf +fdimage +mtools.conf +image.iso +hdimage diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile new file mode 100644 index 000000000..9e38ffaad --- /dev/null +++ b/arch/x86/boot/Makefile @@ -0,0 +1,159 @@ +# +# arch/x86/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# Changed by many, many contributors over the years. +# + +# Sanitizer runtimes are unavailable and cannot be linked for early boot code. +KASAN_SANITIZE := n +KCSAN_SANITIZE := n +KMSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y + +# Kernel does not boot with kcov instrumentation here. +# One of the problems observed was insertion of __sanitizer_cov_trace_pc() +# callback into middle of per-cpu data enabling code. Thus the callback observed +# inconsistent state and crashed. We are interested mostly in syscall coverage, +# so boot code is not interesting anyway. +KCOV_INSTRUMENT := n + +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. + +SVGA_MODE := -DSVGA_MODE=NORMAL_VGA + +targets := vmlinux.bin setup.bin setup.elf bzImage +targets += fdimage fdimage144 fdimage288 image.iso hdimage +subdir- := compressed + +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o +setup-y += early_serial_console.o edd.o header.o main.o memory.o +setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o +setup-y += video-mode.o version.o +setup-$(CONFIG_X86_APM_BOOT) += apm.o + +# The link order of the video-*.o modules can matter. In particular, +# video-vga.o *must* be listed first, followed by video-vesa.o. +# Hardware-specific drivers should follow in the order they should be +# probed, and video-bios.o should typically be last. +setup-y += video-vga.o +setup-y += video-vesa.o +setup-y += video-bios.o + +targets += $(setup-y) +hostprogs := tools/build +hostprogs += mkcpustr + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ + -include include/generated/autoconf.h \ + -D__EXPORTED_HEADERS__ + +ifdef CONFIG_X86_FEATURE_NAMES +$(obj)/cpu.o: $(obj)/cpustr.h + +quiet_cmd_cpustr = CPUSTR $@ + cmd_cpustr = $(obj)/mkcpustr > $@ +$(obj)/cpustr.h: $(obj)/mkcpustr FORCE + $(call if_changed,cpustr) +endif +targets += cpustr.h + +# --------------------------------------------------------------------------- + +KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +GCOV_PROFILE := n +UBSAN_SANITIZE := n + +$(obj)/bzImage: asflags-y := $(SVGA_MODE) + +quiet_cmd_image = BUILD $@ +silent_redirect_image = >/dev/null +cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ + $(obj)/zoffset.h $@ $($(quiet)redirect_image) + +$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE + $(call if_changed,image) + @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' + +OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S +$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE + $(call if_changed,objcopy) + +SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) + +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' + +quiet_cmd_zoffset = ZOFFSET $@ + cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ + +targets += zoffset.h +$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE + $(call if_changed,zoffset) + + +AFLAGS_header.o += -I$(objtree)/$(obj) +$(obj)/header.o: $(obj)/zoffset.h + +LDFLAGS_setup.elf := -m elf_i386 -z noexecstack -T +$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE + $(call if_changed,ld) + +OBJCOPYFLAGS_setup.bin := -O binary +$(obj)/setup.bin: $(obj)/setup.elf FORCE + $(call if_changed,objcopy) + +$(obj)/compressed/vmlinux: FORCE + $(Q)$(MAKE) $(build)=$(obj)/compressed $@ + +# Set this if you want to pass append arguments to the +# bzdisk/fdimage/hdimage/isoimage kernel +FDARGS = +# Set this if you want one or more initrds included in the image +FDINITRD = + +imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh + +$(obj)/mtools.conf: $(src)/mtools.conf.in + sed -e 's|@OBJ@|$(obj)|g' < $< > $@ + +targets += mtools.conf + +# genimage.sh requires bash, but it also has a bunch of other +# external dependencies. +quiet_cmd_genimage = GENIMAGE $3 +cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ + $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD) + +PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage + +# This requires write access to /dev/fd0 +# All images require syslinux to be installed; hdimage also requires +# EDK2/OVMF if the kernel is compiled with the EFI stub. +bzdisk: $(imgdeps) + $(call cmd,genimage,bzdisk,/dev/fd0) + +fdimage fdimage144: $(imgdeps) + $(call cmd,genimage,fdimage144,$(obj)/fdimage) + @$(kecho) 'Kernel: $(obj)/fdimage is ready' + +fdimage288: $(imgdeps) + $(call cmd,genimage,fdimage288,$(obj)/fdimage) + @$(kecho) 'Kernel: $(obj)/fdimage is ready' + +hdimage: $(imgdeps) + $(call cmd,genimage,hdimage,$(obj)/hdimage) + @$(kecho) 'Kernel: $(obj)/hdimage is ready' + +isoimage: $(imgdeps) + $(call cmd,genimage,isoimage,$(obj)/image.iso) + @$(kecho) 'Kernel: $(obj)/image.iso is ready' diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c new file mode 100644 index 000000000..a2b6b4289 --- /dev/null +++ b/arch/x86/boot/a20.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Enable A20 gate (return -1 on failure) + */ + +#include "boot.h" + +#define MAX_8042_LOOPS 100000 +#define MAX_8042_FF 32 + +static int empty_8042(void) +{ + u8 status; + int loops = MAX_8042_LOOPS; + int ffs = MAX_8042_FF; + + while (loops--) { + io_delay(); + + status = inb(0x64); + if (status == 0xff) { + /* FF is a plausible, but very unlikely status */ + if (!--ffs) + return -1; /* Assume no KBC present */ + } + if (status & 1) { + /* Read and discard input data */ + io_delay(); + (void)inb(0x60); + } else if (!(status & 2)) { + /* Buffers empty, finished! */ + return 0; + } + } + + return -1; +} + +/* Returns nonzero if the A20 line is enabled. The memory address + used as a test is the int $0x80 vector, which should be safe. */ + +#define A20_TEST_ADDR (4*0x80) +#define A20_TEST_SHORT 32 +#define A20_TEST_LONG 2097152 /* 2^21 */ + +static int a20_test(int loops) +{ + int ok = 0; + int saved, ctr; + + set_fs(0x0000); + set_gs(0xffff); + + saved = ctr = rdfs32(A20_TEST_ADDR); + + while (loops--) { + wrfs32(++ctr, A20_TEST_ADDR); + io_delay(); /* Serialize and make delay constant */ + ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr; + if (ok) + break; + } + + wrfs32(saved, A20_TEST_ADDR); + return ok; +} + +/* Quick test to see if A20 is already enabled */ +static int a20_test_short(void) +{ + return a20_test(A20_TEST_SHORT); +} + +/* Longer test that actually waits for A20 to come on line; this + is useful when dealing with the KBC or other slow external circuitry. */ +static int a20_test_long(void) +{ + return a20_test(A20_TEST_LONG); +} + +static void enable_a20_bios(void) +{ + struct biosregs ireg; + + initregs(&ireg); + ireg.ax = 0x2401; + intcall(0x15, &ireg, NULL); +} + +static void enable_a20_kbc(void) +{ + empty_8042(); + + outb(0xd1, 0x64); /* Command write */ + empty_8042(); + + outb(0xdf, 0x60); /* A20 on */ + empty_8042(); + + outb(0xff, 0x64); /* Null command, but UHCI wants it */ + empty_8042(); +} + +static void enable_a20_fast(void) +{ + u8 port_a; + + port_a = inb(0x92); /* Configuration port A */ + port_a |= 0x02; /* Enable A20 */ + port_a &= ~0x01; /* Do not reset machine */ + outb(port_a, 0x92); +} + +/* + * Actual routine to enable A20; return 0 on ok, -1 on failure + */ + +#define A20_ENABLE_LOOPS 255 /* Number of times to try */ + +int enable_a20(void) +{ + int loops = A20_ENABLE_LOOPS; + int kbc_err; + + while (loops--) { + /* First, check to see if A20 is already enabled + (legacy free, etc.) */ + if (a20_test_short()) + return 0; + + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ + enable_a20_bios(); + if (a20_test_short()) + return 0; + + /* Try enabling A20 through the keyboard controller */ + kbc_err = empty_8042(); + + if (a20_test_short()) + return 0; /* BIOS worked, but with delayed reaction */ + + if (!kbc_err) { + enable_a20_kbc(); + if (a20_test_long()) + return 0; + } + + /* Finally, try enabling the "fast A20 gate" */ + enable_a20_fast(); + if (a20_test_long()) + return 0; + } + + return -1; +} diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c new file mode 100644 index 000000000..bda15f967 --- /dev/null +++ b/arch/x86/boot/apm.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * Original APM BIOS checking by Stephen Rothwell, May 1994 + * (sfr@canb.auug.org.au) + * + * ----------------------------------------------------------------------- */ + +/* + * Get APM BIOS information + */ + +#include "boot.h" + +int query_apm_bios(void) +{ + struct biosregs ireg, oreg; + + /* APM BIOS installation check */ + initregs(&ireg); + ireg.ah = 0x53; + intcall(0x15, &ireg, &oreg); + + if (oreg.flags & X86_EFLAGS_CF) + return -1; /* No APM BIOS */ + + if (oreg.bx != 0x504d) /* "PM" signature */ + return -1; + + if (!(oreg.cx & 0x02)) /* 32 bits supported? */ + return -1; + + /* Disconnect first, just in case */ + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); + + /* 32-bit connect */ + ireg.al = 0x03; + intcall(0x15, &ireg, &oreg); + + boot_params.apm_bios_info.cseg = oreg.ax; + boot_params.apm_bios_info.offset = oreg.ebx; + boot_params.apm_bios_info.cseg_16 = oreg.cx; + boot_params.apm_bios_info.dseg = oreg.dx; + boot_params.apm_bios_info.cseg_len = oreg.si; + boot_params.apm_bios_info.cseg_16_len = oreg.hsi; + boot_params.apm_bios_info.dseg_len = oreg.di; + + if (oreg.flags & X86_EFLAGS_CF) + return -1; + + /* Redo the installation check as the 32-bit connect; + some BIOSes return different flags this way... */ + + ireg.al = 0x00; + intcall(0x15, &ireg, &oreg); + + if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) { + /* Failure with 32-bit connect, try to disconnect and ignore */ + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); + return -1; + } + + boot_params.apm_bios_info.version = oreg.ax; + boot_params.apm_bios_info.flags = oreg.cx; + return 0; +} + diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S new file mode 100644 index 000000000..aa9b96457 --- /dev/null +++ b/arch/x86/boot/bioscall.S @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* ----------------------------------------------------------------------- + * + * Copyright 2009-2014 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes + * touching registers they shouldn't be. + */ + + .code16 + .section ".inittext","ax" + .globl intcall + .type intcall, @function +intcall: + /* Self-modify the INT instruction. Ugly, but works. */ + cmpb %al, 3f + je 1f + movb %al, 3f + jmp 1f /* Synchronize pipeline */ +1: + /* Save state */ + pushfl + pushw %fs + pushw %gs + pushal + + /* Copy input state to stack frame */ + subw $44, %sp + movw %dx, %si + movw %sp, %di + movw $11, %cx + rep; movsl + + /* Pop full state from the stack */ + popal + popw %gs + popw %fs + popw %es + popw %ds + popfl + + /* Actual INT */ + .byte 0xcd /* INT opcode */ +3: .byte 0 + + /* Push full state to the stack */ + pushfl + pushw %ds + pushw %es + pushw %fs + pushw %gs + pushal + + /* Re-establish C environment invariants */ + cld + movzwl %sp, %esp + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + + /* Copy output state from stack frame */ + movw 68(%esp), %di /* Original %cx == 3rd argument */ + andw %di, %di + jz 4f + movw %sp, %si + movw $11, %cx + rep; movsl +4: addw $44, %sp + + /* Restore state and return */ + popal + popw %gs + popw %fs + popfl + retl + .size intcall, .-intcall diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h new file mode 100644 index 000000000..8518ae214 --- /dev/null +++ b/arch/x86/boot/bitops.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Very simple bitops for the boot code. + */ + +#ifndef BOOT_BITOPS_H +#define BOOT_BITOPS_H +#define _LINUX_BITOPS_H /* Inhibit inclusion of */ + +#include +#include + +static inline bool constant_test_bit(int nr, const void *addr) +{ + const u32 *p = addr; + return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; +} +static inline bool variable_test_bit(int nr, const void *addr) +{ + bool v; + const u32 *p = addr; + + asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr)); + return v; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +static inline void set_bit(int nr, void *addr) +{ + asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); +} + +#endif /* BOOT_BITOPS_H */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h new file mode 100644 index 000000000..148ba5c51 --- /dev/null +++ b/arch/x86/boot/boot.h @@ -0,0 +1,333 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Header file for the real-mode kernel code + */ + +#ifndef BOOT_BOOT_H +#define BOOT_BOOT_H + +#define STACK_SIZE 1024 /* Minimum number of bytes for stack */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include +#include "bitops.h" +#include "ctype.h" +#include "cpuflags.h" +#include "io.h" + +/* Useful macros */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +extern struct setup_header hdr; +extern struct boot_params boot_params; + +#define cpu_relax() asm volatile("rep; nop") + +static inline void io_delay(void) +{ + const u16 DELAY_PORT = 0x80; + outb(0, DELAY_PORT); +} + +/* These functions are used to reference data in other segments. */ + +static inline u16 ds(void) +{ + u16 seg; + asm("movw %%ds,%0" : "=rm" (seg)); + return seg; +} + +static inline void set_fs(u16 seg) +{ + asm volatile("movw %0,%%fs" : : "rm" (seg)); +} +static inline u16 fs(void) +{ + u16 seg; + asm volatile("movw %%fs,%0" : "=rm" (seg)); + return seg; +} + +static inline void set_gs(u16 seg) +{ + asm volatile("movw %0,%%gs" : : "rm" (seg)); +} +static inline u16 gs(void) +{ + u16 seg; + asm volatile("movw %%gs,%0" : "=rm" (seg)); + return seg; +} + +typedef unsigned int addr_t; + +static inline u8 rdfs8(addr_t addr) +{ + u8 *ptr = (u8 *)absolute_pointer(addr); + u8 v; + asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr)); + return v; +} +static inline u16 rdfs16(addr_t addr) +{ + u16 *ptr = (u16 *)absolute_pointer(addr); + u16 v; + asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr)); + return v; +} +static inline u32 rdfs32(addr_t addr) +{ + u32 *ptr = (u32 *)absolute_pointer(addr); + u32 v; + asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr)); + return v; +} + +static inline void wrfs8(u8 v, addr_t addr) +{ + u8 *ptr = (u8 *)absolute_pointer(addr); + asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v)); +} +static inline void wrfs16(u16 v, addr_t addr) +{ + u16 *ptr = (u16 *)absolute_pointer(addr); + asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v)); +} +static inline void wrfs32(u32 v, addr_t addr) +{ + u32 *ptr = (u32 *)absolute_pointer(addr); + asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v)); +} + +static inline u8 rdgs8(addr_t addr) +{ + u8 *ptr = (u8 *)absolute_pointer(addr); + u8 v; + asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr)); + return v; +} +static inline u16 rdgs16(addr_t addr) +{ + u16 *ptr = (u16 *)absolute_pointer(addr); + u16 v; + asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr)); + return v; +} +static inline u32 rdgs32(addr_t addr) +{ + u32 *ptr = (u32 *)absolute_pointer(addr); + u32 v; + asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr)); + return v; +} + +static inline void wrgs8(u8 v, addr_t addr) +{ + u8 *ptr = (u8 *)absolute_pointer(addr); + asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v)); +} +static inline void wrgs16(u16 v, addr_t addr) +{ + u16 *ptr = (u16 *)absolute_pointer(addr); + asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v)); +} +static inline void wrgs32(u32 v, addr_t addr) +{ + u32 *ptr = (u32 *)absolute_pointer(addr); + asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v)); +} + +/* Note: these only return true/false, not a signed return value! */ +static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) +{ + bool diff; + asm volatile("fs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} +static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) +{ + bool diff; + asm volatile("gs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +/* Heap -- available for dynamic lists. */ +extern char _end[]; +extern char *HEAP; +extern char *heap_end; +#define RESET_HEAP() ((void *)( HEAP = _end )) +static inline char *__get_heap(size_t s, size_t a, size_t n) +{ + char *tmp; + + HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1)); + tmp = HEAP; + HEAP += s*n; + return tmp; +} +#define GET_HEAP(type, n) \ + ((type *)__get_heap(sizeof(type),__alignof__(type),(n))) + +static inline bool heap_free(size_t n) +{ + return (int)(heap_end-HEAP) >= (int)n; +} + +/* copy.S */ + +void copy_to_fs(addr_t dst, void *src, size_t len); +void *copy_from_fs(void *dst, addr_t src, size_t len); +void copy_to_gs(addr_t dst, void *src, size_t len); +void *copy_from_gs(void *dst, addr_t src, size_t len); + +/* a20.c */ +int enable_a20(void); + +/* apm.c */ +int query_apm_bios(void); + +/* bioscall.c */ +struct biosregs { + union { + struct { + u32 edi; + u32 esi; + u32 ebp; + u32 _esp; + u32 ebx; + u32 edx; + u32 ecx; + u32 eax; + u32 _fsgs; + u32 _dses; + u32 eflags; + }; + struct { + u16 di, hdi; + u16 si, hsi; + u16 bp, hbp; + u16 _sp, _hsp; + u16 bx, hbx; + u16 dx, hdx; + u16 cx, hcx; + u16 ax, hax; + u16 gs, fs; + u16 es, ds; + u16 flags, hflags; + }; + struct { + u8 dil, dih, edi2, edi3; + u8 sil, sih, esi2, esi3; + u8 bpl, bph, ebp2, ebp3; + u8 _spl, _sph, _esp2, _esp3; + u8 bl, bh, ebx2, ebx3; + u8 dl, dh, edx2, edx3; + u8 cl, ch, ecx2, ecx3; + u8 al, ah, eax2, eax3; + }; + }; +}; +void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); + +/* cmdline.c */ +int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize); +int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option); +static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ + unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + + if (cmd_line_ptr >= 0x100000) + return -1; /* inaccessible */ + + return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize); +} + +static inline int cmdline_find_option_bool(const char *option) +{ + unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + + if (cmd_line_ptr >= 0x100000) + return -1; /* inaccessible */ + + return __cmdline_find_option_bool(cmd_line_ptr, option); +} + +/* cpu.c, cpucheck.c */ +int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); +int check_knl_erratum(void); +int validate_cpu(void); + +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); + +/* edd.c */ +void query_edd(void); + +/* header.S */ +void __attribute__((noreturn)) die(void); + +/* memory.c */ +void detect_memory(void); + +/* pm.c */ +void __attribute__((noreturn)) go_to_protected_mode(void); + +/* pmjump.S */ +void __attribute__((noreturn)) + protected_mode_jump(u32 entrypoint, u32 bootparams); + +/* printf.c */ +int sprintf(char *buf, const char *fmt, ...); +int vsprintf(char *buf, const char *fmt, va_list args); +int printf(const char *fmt, ...); + +/* regs.c */ +void initregs(struct biosregs *regs); + +/* string.c */ +int strcmp(const char *str1, const char *str2); +int strncmp(const char *cs, const char *ct, size_t count); +size_t strnlen(const char *s, size_t maxlen); +unsigned int atou(const char *s); +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); +size_t strlen(const char *s); +char *strchr(const char *s, int c); + +/* tty.c */ +void puts(const char *); +void putchar(int); +int getchar(void); +void kbd_flush(void); +int getchar_timeout(void); + +/* video.c */ +void set_video(void); + +/* video-mode.c */ +int set_mode(u16 mode); +int mode_defined(u16 mode); +void probe_cards(int unsafe); + +/* video-vesa.c */ +void vesa_store_edid(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c new file mode 100644 index 000000000..21d56ae83 --- /dev/null +++ b/arch/x86/boot/cmdline.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Simple command-line parser for early boot. + */ + +#include "boot.h" + +static inline int myisspace(u8 c) +{ + return c <= ' '; /* Close enough approximation */ +} + +/* + * Find a non-boolean option, that is, "option=argument". In accordance + * with standard Linux practice, if this option is repeated, this returns + * the last instance on the command line. + * + * Returns the length of the argument (regardless of if it was + * truncated to fit in the buffer), or -1 on not found. + */ +int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize) +{ + addr_t cptr; + char c; + int len = -1; + const char *opptr = NULL; + char *bufptr = buffer; + enum { + st_wordstart, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + st_bufcpy /* Copying this to buffer */ + } state = st_wordstart; + + if (!cmdline_ptr) + return -1; /* No command line */ + + cptr = cmdline_ptr & 0xf; + set_fs(cmdline_ptr >> 4); + + while (cptr < 0x10000 && (c = rdfs8(cptr++))) { + switch (state) { + case st_wordstart: + if (myisspace(c)) + break; + + /* else */ + state = st_wordcmp; + opptr = option; + fallthrough; + + case st_wordcmp: + if (c == '=' && !*opptr) { + len = 0; + bufptr = buffer; + state = st_bufcpy; + } else if (myisspace(c)) { + state = st_wordstart; + } else if (c != *opptr++) { + state = st_wordskip; + } + break; + + case st_wordskip: + if (myisspace(c)) + state = st_wordstart; + break; + + case st_bufcpy: + if (myisspace(c)) { + state = st_wordstart; + } else { + if (len < bufsize-1) + *bufptr++ = c; + len++; + } + break; + } + } + + if (bufsize) + *bufptr = '\0'; + + return len; +} + +/* + * Find a boolean option (like quiet,noapic,nosmp....) + * + * Returns the position of that option (starts counting with 1) + * or 0 on not found + */ +int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option) +{ + addr_t cptr; + char c; + int pos = 0, wstart = 0; + const char *opptr = NULL; + enum { + st_wordstart, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + } state = st_wordstart; + + if (!cmdline_ptr) + return -1; /* No command line */ + + cptr = cmdline_ptr & 0xf; + set_fs(cmdline_ptr >> 4); + + while (cptr < 0x10000) { + c = rdfs8(cptr++); + pos++; + + switch (state) { + case st_wordstart: + if (!c) + return 0; + else if (myisspace(c)) + break; + + state = st_wordcmp; + opptr = option; + wstart = pos; + fallthrough; + + case st_wordcmp: + if (!*opptr) + if (!c || myisspace(c)) + return wstart; + else + state = st_wordskip; + else if (!c) + return 0; + else if (c != *opptr++) + state = st_wordskip; + break; + + case st_wordskip: + if (!c) + return 0; + else if (myisspace(c)) + state = st_wordstart; + break; + } + } + + return 0; /* Buffer overrun */ +} diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore new file mode 100644 index 000000000..25805199a --- /dev/null +++ b/arch/x86/boot/compressed/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +relocs +vmlinux.bin.all +vmlinux.relocs +vmlinux.lds +mkpiggy +piggy.S diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile new file mode 100644 index 000000000..15b7b403a --- /dev/null +++ b/arch/x86/boot/compressed/Makefile @@ -0,0 +1,161 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# linux/arch/x86/boot/compressed/Makefile +# +# create a compressed vmlinux image from the original vmlinux +# +# vmlinuz is: +# decompression code (*.o) +# asm globals (piggy.S), including: +# vmlinux.bin.(gz|bz2|lzma|...) +# +# vmlinux.bin is: +# vmlinux stripped of debugging and comments +# vmlinux.bin.all is: +# vmlinux.bin + vmlinux.relocs +# vmlinux.bin.(gz|bz2|lzma|...) is: +# (see scripts/Makefile.lib size_append) +# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all + +# Sanitizer runtimes are unavailable and cannot be linked for early boot code. +KASAN_SANITIZE := n +KCSAN_SANITIZE := n +KMSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y + +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst + +# CLANG_FLAGS must come before any cc-disable-warning or cc-option calls in +# case of cross compiling, as it has the '--target=' flag, which is needed to +# avoid errors with '-march=i386', and future flags may depend on the target to +# be valid. +KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -fno-strict-aliasing -fPIE +KBUILD_CFLAGS += -Wundef +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +cflags-$(CONFIG_X86_32) := -march=i386 +cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += -mno-mmx -mno-sse +KBUILD_CFLAGS += -ffreestanding -fshort-wchar +KBUILD_CFLAGS += -fno-stack-protector +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += -Wno-pointer-sign +KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS += -D__DISABLE_EXPORTS +# Disable relocation relaxation in case the link is not PIE. +KBUILD_CFLAGS += $(call cc-option,-Wa$(comma)-mrelax-relocations=no) +KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h + +# sev.c indirectly inludes inat-table.h which is generated during +# compilation and stored in $(objtree). Add the directory to the includes so +# that the compiler finds it even with out-of-tree builds (make O=/some/path). +CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ + +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +GCOV_PROFILE := n +UBSAN_SANITIZE :=n + +KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) +KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) +# Compressed kernel should be built as PIE since it may be loaded at any +# address by the bootloader. +LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif +LDFLAGS_vmlinux += -z noexecstack +ifeq ($(CONFIG_LD_IS_BFD),y) +LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) +endif +LDFLAGS_vmlinux += -T + +hostprogs := mkpiggy +HOST_EXTRACFLAGS += -I$(srctree)/tools/include + +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' + +quiet_cmd_voffset = VOFFSET $@ + cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ + +targets += ../voffset.h + +$(obj)/../voffset.h: vmlinux FORCE + $(call if_changed,voffset) + +$(obj)/misc.o: $(obj)/../voffset.h + +vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \ + $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \ + $(obj)/piggy.o $(obj)/cpuflags.o + +vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o +vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o +ifdef CONFIG_X86_64 + vmlinux-objs-y += $(obj)/ident_map_64.o + vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o + vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-y += $(obj)/pgtable_64.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o +endif + +vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o +vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o + +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o +vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o +efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a + +$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE + $(call if_changed,ld) + +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs + +CMD_RELOCS = arch/x86/tools/relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs + +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE + $(call if_changed,bzip2_with_size) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzma_with_size) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,xzkern_with_size) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzo_with_size) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lz4_with_size) +$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE + $(call if_changed,zstd22_with_size) + +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 +suffix-$(CONFIG_KERNEL_ZSTD) := zst + +quiet_cmd_mkpiggy = MKPIGGY $@ + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ + +targets += piggy.S +$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE + $(call if_changed,mkpiggy) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c new file mode 100644 index 000000000..9caf89063 --- /dev/null +++ b/arch/x86/boot/compressed/acpi.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +#define BOOT_CTYPE_H +#include "misc.h" +#include "error.h" +#include "../string.h" +#include "efi.h" + +#include + +/* + * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0' + * for termination. + */ +#define MAX_ACPI_ARG_LENGTH 10 + +/* + * Immovable memory regions representation. Max amount of memory regions is + * MAX_NUMNODES*2. + */ +struct mem_vector immovable_mem[MAX_NUMNODES*2]; + +static acpi_physical_address +__efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) +{ +#ifdef CONFIG_EFI + unsigned long rsdp_addr; + int ret; + + /* + * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to + * ACPI_TABLE_GUID because it has more features. + */ + rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + ACPI_20_TABLE_GUID); + if (rsdp_addr) + return (acpi_physical_address)rsdp_addr; + + /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ + rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + ACPI_TABLE_GUID); + if (rsdp_addr) + return (acpi_physical_address)rsdp_addr; + + debug_putstr("Error getting RSDP address.\n"); +#endif + return 0; +} + +static acpi_physical_address efi_get_rsdp_addr(void) +{ +#ifdef CONFIG_EFI + unsigned long cfg_tbl_pa = 0; + unsigned int cfg_tbl_len; + unsigned long systab_pa; + unsigned int nr_tables; + enum efi_type et; + int ret; + + et = efi_get_type(boot_params); + if (et == EFI_TYPE_NONE) + return 0; + + systab_pa = efi_get_system_table(boot_params); + if (!systab_pa) + error("EFI support advertised, but unable to locate system table."); + + ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); + if (ret || !cfg_tbl_pa) + error("EFI config table not found."); + + return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len); +#else + return 0; +#endif +} + +static u8 compute_checksum(u8 *buffer, u32 length) +{ + u8 *end = buffer + length; + u8 sum = 0; + + while (buffer < end) + sum += *(buffer++); + + return sum; +} + +/* Search a block of memory for the RSDP signature. */ +static u8 *scan_mem_for_rsdp(u8 *start, u32 length) +{ + struct acpi_table_rsdp *rsdp; + u8 *address, *end; + + end = start + length; + + /* Search from given start address for the requested length */ + for (address = start; address < end; address += ACPI_RSDP_SCAN_STEP) { + /* + * Both RSDP signature and checksum must be correct. + * Note: Sometimes there exists more than one RSDP in memory; + * the valid RSDP has a valid checksum, all others have an + * invalid checksum. + */ + rsdp = (struct acpi_table_rsdp *)address; + + /* BAD Signature */ + if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature)) + continue; + + /* Check the standard checksum */ + if (compute_checksum((u8 *)rsdp, ACPI_RSDP_CHECKSUM_LENGTH)) + continue; + + /* Check extended checksum if table version >= 2 */ + if ((rsdp->revision >= 2) && + (compute_checksum((u8 *)rsdp, ACPI_RSDP_XCHECKSUM_LENGTH))) + continue; + + /* Signature and checksum valid, we have found a real RSDP */ + return address; + } + return NULL; +} + +/* Search RSDP address in EBDA. */ +static acpi_physical_address bios_get_rsdp_addr(void) +{ + unsigned long address; + u8 *rsdp; + + /* Get the location of the Extended BIOS Data Area (EBDA) */ + address = *(u16 *)ACPI_EBDA_PTR_LOCATION; + address <<= 4; + + /* + * Search EBDA paragraphs (EBDA is required to be a minimum of + * 1K length) + */ + if (address > 0x400) { + rsdp = scan_mem_for_rsdp((u8 *)address, ACPI_EBDA_WINDOW_SIZE); + if (rsdp) + return (acpi_physical_address)(unsigned long)rsdp; + } + + /* Search upper memory: 16-byte boundaries in E0000h-FFFFFh */ + rsdp = scan_mem_for_rsdp((u8 *) ACPI_HI_RSDP_WINDOW_BASE, + ACPI_HI_RSDP_WINDOW_SIZE); + if (rsdp) + return (acpi_physical_address)(unsigned long)rsdp; + + return 0; +} + +/* Return RSDP address on success, otherwise 0. */ +acpi_physical_address get_rsdp_addr(void) +{ + acpi_physical_address pa; + + pa = boot_params->acpi_rsdp_addr; + + if (!pa) + pa = efi_get_rsdp_addr(); + + if (!pa) + pa = bios_get_rsdp_addr(); + + return pa; +} + +#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) +/* + * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex + * digits, and '\0' for termination. + */ +#define MAX_ADDR_LEN 19 + +static unsigned long get_cmdline_acpi_rsdp(void) +{ + unsigned long addr = 0; + +#ifdef CONFIG_KEXEC + char val[MAX_ADDR_LEN] = { }; + int ret; + + ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN); + if (ret < 0) + return 0; + + if (boot_kstrtoul(val, 16, &addr)) + return 0; +#endif + return addr; +} + +/* Compute SRAT address from RSDP. */ +static unsigned long get_acpi_srat_table(void) +{ + unsigned long root_table, acpi_table; + struct acpi_table_header *header; + struct acpi_table_rsdp *rsdp; + u32 num_entries, size, len; + char arg[10]; + u8 *entry; + + /* + * Check whether we were given an RSDP on the command line. We don't + * stash this in boot params because the kernel itself may have + * different ideas about whether to trust a command-line parameter. + */ + rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); + if (!rsdp) + rsdp = (struct acpi_table_rsdp *)(long) + boot_params->acpi_rsdp_addr; + + if (!rsdp) + return 0; + + /* Get ACPI root table from RSDP.*/ + if (!(cmdline_find_option("acpi", arg, sizeof(arg)) == 4 && + !strncmp(arg, "rsdt", 4)) && + rsdp->xsdt_physical_address && + rsdp->revision > 1) { + root_table = rsdp->xsdt_physical_address; + size = ACPI_XSDT_ENTRY_SIZE; + } else { + root_table = rsdp->rsdt_physical_address; + size = ACPI_RSDT_ENTRY_SIZE; + } + + if (!root_table) + return 0; + + header = (struct acpi_table_header *)root_table; + len = header->length; + if (len < sizeof(struct acpi_table_header) + size) + return 0; + + num_entries = (len - sizeof(struct acpi_table_header)) / size; + entry = (u8 *)(root_table + sizeof(struct acpi_table_header)); + + while (num_entries--) { + if (size == ACPI_RSDT_ENTRY_SIZE) + acpi_table = *(u32 *)entry; + else + acpi_table = *(u64 *)entry; + + if (acpi_table) { + header = (struct acpi_table_header *)acpi_table; + + if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_SRAT)) + return acpi_table; + } + entry += size; + } + return 0; +} + +/** + * count_immovable_mem_regions - Parse SRAT and cache the immovable + * memory regions into the immovable_mem array. + * + * Return the number of immovable memory regions on success, 0 on failure: + * + * - Too many immovable memory regions + * - ACPI off or no SRAT found + * - No immovable memory region found. + */ +int count_immovable_mem_regions(void) +{ + unsigned long table_addr, table_end, table; + struct acpi_subtable_header *sub_table; + struct acpi_table_header *table_header; + char arg[MAX_ACPI_ARG_LENGTH]; + int num = 0; + + if (cmdline_find_option("acpi", arg, sizeof(arg)) == 3 && + !strncmp(arg, "off", 3)) + return 0; + + table_addr = get_acpi_srat_table(); + if (!table_addr) + return 0; + + table_header = (struct acpi_table_header *)table_addr; + table_end = table_addr + table_header->length; + table = table_addr + sizeof(struct acpi_table_srat); + + while (table + sizeof(struct acpi_subtable_header) < table_end) { + + sub_table = (struct acpi_subtable_header *)table; + if (!sub_table->length) { + debug_putstr("Invalid zero length SRAT subtable.\n"); + return 0; + } + + if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) { + struct acpi_srat_mem_affinity *ma; + + ma = (struct acpi_srat_mem_affinity *)sub_table; + if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && ma->length) { + immovable_mem[num].start = ma->base_address; + immovable_mem[num].size = ma->length; + num++; + } + + if (num >= MAX_NUMNODES*2) { + debug_putstr("Too many immovable memory regions, aborting.\n"); + return 0; + } + } + table += sub_table->length; + } + return num; +} +#endif /* CONFIG_RANDOMIZE_BASE && CONFIG_MEMORY_HOTREMOVE */ diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c new file mode 100644 index 000000000..f1add5d85 --- /dev/null +++ b/arch/x86/boot/compressed/cmdline.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "misc.h" + +static unsigned long fs; +static inline void set_fs(unsigned long seg) +{ + fs = seg << 4; /* shift it back */ +} +typedef unsigned long addr_t; +static inline char rdfs8(addr_t addr) +{ + return *((char *)(fs + addr)); +} +#include "../cmdline.c" +unsigned long get_cmd_line_ptr(void) +{ + unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; + + cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; + + return cmd_line_ptr; +} +int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ + return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize); +} +int cmdline_find_option_bool(const char *option) +{ + return __cmdline_find_option_bool(get_cmd_line_ptr(), option); +} diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 000000000..0cc132389 --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../cpuflags.c" + +bool has_cpuflag(int flag) +{ + get_cpuflags(); + + return test_bit(flag, cpu.flags); +} diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c new file mode 100644 index 000000000..70a8d1706 --- /dev/null +++ b/arch/x86/boot/compressed/early_serial_console.c @@ -0,0 +1,6 @@ +#include "misc.h" + +/* This might be accessed before .bss is cleared, so use .data instead. */ +int early_serial_base __section(".data"); + +#include "../early_serial_console.c" diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c new file mode 100644 index 000000000..6edd034b0 --- /dev/null +++ b/arch/x86/boot/compressed/efi.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for early access to EFI configuration table. + * + * Originally derived from arch/x86/boot/compressed/acpi.c + */ + +#include "misc.h" + +/** + * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment. + * + * @bp: pointer to boot_params + * + * Return: EFI_TYPE_{32,64} for valid EFI environments, EFI_TYPE_NONE otherwise. + */ +enum efi_type efi_get_type(struct boot_params *bp) +{ + struct efi_info *ei; + enum efi_type et; + const char *sig; + + ei = &bp->efi_info; + sig = (char *)&ei->efi_loader_signature; + + if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { + et = EFI_TYPE_64; + } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) { + et = EFI_TYPE_32; + } else { + debug_putstr("No EFI environment detected.\n"); + et = EFI_TYPE_NONE; + } + +#ifndef CONFIG_X86_64 + /* + * Existing callers like acpi.c treat this case as an indicator to + * fall-through to non-EFI, rather than an error, so maintain that + * functionality here as well. + */ + if (ei->efi_systab_hi || ei->efi_memmap_hi) { + debug_putstr("EFI system table is located above 4GB and cannot be accessed.\n"); + et = EFI_TYPE_NONE; + } +#endif + + return et; +} + +/** + * efi_get_system_table - Given a pointer to boot_params, retrieve the physical address + * of the EFI system table. + * + * @bp: pointer to boot_params + * + * Return: EFI system table address on success. On error, return 0. + */ +unsigned long efi_get_system_table(struct boot_params *bp) +{ + unsigned long sys_tbl_pa; + struct efi_info *ei; + enum efi_type et; + + /* Get systab from boot params. */ + ei = &bp->efi_info; +#ifdef CONFIG_X86_64 + sys_tbl_pa = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32); +#else + sys_tbl_pa = ei->efi_systab; +#endif + if (!sys_tbl_pa) { + debug_putstr("EFI system table not found."); + return 0; + } + + return sys_tbl_pa; +} + +/* + * EFI config table address changes to virtual address after boot, which may + * not be accessible for the kexec'd kernel. To address this, kexec provides + * the initial physical address via a struct setup_data entry, which is + * checked for here, along with some sanity checks. + */ +static struct efi_setup_data *get_kexec_setup_data(struct boot_params *bp, + enum efi_type et) +{ +#ifdef CONFIG_X86_64 + struct efi_setup_data *esd = NULL; + struct setup_data *data; + u64 pa_data; + + pa_data = bp->hdr.setup_data; + while (pa_data) { + data = (struct setup_data *)pa_data; + if (data->type == SETUP_EFI) { + esd = (struct efi_setup_data *)(pa_data + sizeof(struct setup_data)); + break; + } + + pa_data = data->next; + } + + /* + * Original ACPI code falls back to attempting normal EFI boot in these + * cases, so maintain existing behavior by indicating non-kexec + * environment to the caller, but print them for debugging. + */ + if (esd && !esd->tables) { + debug_putstr("kexec EFI environment missing valid configuration table.\n"); + return NULL; + } + + return esd; +#endif + return NULL; +} + +/** + * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical + * address of EFI configuration table. + * + * @bp: pointer to boot_params + * @cfg_tbl_pa: location to store physical address of config table + * @cfg_tbl_len: location to store number of config table entries + * + * Return: 0 on success. On error, return params are left unchanged. + */ +int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, + unsigned int *cfg_tbl_len) +{ + unsigned long sys_tbl_pa; + enum efi_type et; + int ret; + + if (!cfg_tbl_pa || !cfg_tbl_len) + return -EINVAL; + + sys_tbl_pa = efi_get_system_table(bp); + if (!sys_tbl_pa) + return -EINVAL; + + /* Handle EFI bitness properly */ + et = efi_get_type(bp); + if (et == EFI_TYPE_64) { + efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa; + struct efi_setup_data *esd; + + /* kexec provides an alternative EFI conf table, check for it. */ + esd = get_kexec_setup_data(bp, et); + + *cfg_tbl_pa = esd ? esd->tables : stbl->tables; + *cfg_tbl_len = stbl->nr_tables; + } else if (et == EFI_TYPE_32) { + efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa; + + *cfg_tbl_pa = stbl->tables; + *cfg_tbl_len = stbl->nr_tables; + } else { + return -EINVAL; + } + + return 0; +} + +/* Get vendor table address/guid from EFI config table at the given index */ +static int get_vendor_table(void *cfg_tbl, unsigned int idx, + unsigned long *vendor_tbl_pa, + efi_guid_t *vendor_tbl_guid, + enum efi_type et) +{ + if (et == EFI_TYPE_64) { + efi_config_table_64_t *tbl_entry = (efi_config_table_64_t *)cfg_tbl + idx; + + if (!IS_ENABLED(CONFIG_X86_64) && tbl_entry->table >> 32) { + debug_putstr("Error: EFI config table entry located above 4GB.\n"); + return -EINVAL; + } + + *vendor_tbl_pa = tbl_entry->table; + *vendor_tbl_guid = tbl_entry->guid; + + } else if (et == EFI_TYPE_32) { + efi_config_table_32_t *tbl_entry = (efi_config_table_32_t *)cfg_tbl + idx; + + *vendor_tbl_pa = tbl_entry->table; + *vendor_tbl_guid = tbl_entry->guid; + } else { + return -EINVAL; + } + + return 0; +} + +/** + * efi_find_vendor_table - Given EFI config table, search it for the physical + * address of the vendor table associated with GUID. + * + * @bp: pointer to boot_params + * @cfg_tbl_pa: pointer to EFI configuration table + * @cfg_tbl_len: number of entries in EFI configuration table + * @guid: GUID of vendor table + * + * Return: vendor table address on success. On error, return 0. + */ +unsigned long efi_find_vendor_table(struct boot_params *bp, + unsigned long cfg_tbl_pa, + unsigned int cfg_tbl_len, + efi_guid_t guid) +{ + enum efi_type et; + unsigned int i; + + et = efi_get_type(bp); + if (et == EFI_TYPE_NONE) + return 0; + + for (i = 0; i < cfg_tbl_len; i++) { + unsigned long vendor_tbl_pa; + efi_guid_t vendor_tbl_guid; + int ret; + + ret = get_vendor_table((void *)cfg_tbl_pa, i, + &vendor_tbl_pa, + &vendor_tbl_guid, et); + if (ret) + return 0; + + if (!efi_guidcmp(guid, vendor_tbl_guid)) + return vendor_tbl_pa; + } + + return 0; +} diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h new file mode 100644 index 000000000..7db2f41b5 --- /dev/null +++ b/arch/x86/boot/compressed/efi.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_EFI_H +#define BOOT_COMPRESSED_EFI_H + +#if defined(_LINUX_EFI_H) || defined(_ASM_X86_EFI_H) +#error Please do not include kernel proper namespace headers +#endif + +typedef guid_t efi_guid_t __aligned(__alignof__(u32)); + +#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ + (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, d } } + +#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) +#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) + +#define EFI32_LOADER_SIGNATURE "EL32" +#define EFI64_LOADER_SIGNATURE "EL64" + +/* + * Generic EFI table header + */ +typedef struct { + u64 signature; + u32 revision; + u32 headersize; + u32 crc32; + u32 reserved; +} efi_table_hdr_t; + +#define EFI_CONVENTIONAL_MEMORY 7 + +#define EFI_MEMORY_MORE_RELIABLE \ + ((u64)0x0000000000010000ULL) /* higher reliability */ +#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ + +#define EFI_PAGE_SHIFT 12 + +typedef struct { + u32 type; + u32 pad; + u64 phys_addr; + u64 virt_addr; + u64 num_pages; + u64 attribute; +} efi_memory_desc_t; + +#define efi_early_memdesc_ptr(map, desc_size, n) \ + (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size))) + +typedef struct { + efi_guid_t guid; + u64 table; +} efi_config_table_64_t; + +typedef struct { + efi_guid_t guid; + u32 table; +} efi_config_table_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 fw_vendor; /* physical addr of CHAR16 vendor string */ + u32 fw_revision; + u32 __pad1; + u64 con_in_handle; + u64 con_in; + u64 con_out_handle; + u64 con_out; + u64 stderr_handle; + u64 stderr; + u64 runtime; + u64 boottime; + u32 nr_tables; + u32 __pad2; + u64 tables; +} efi_system_table_64_t; + +typedef struct { + efi_table_hdr_t hdr; + u32 fw_vendor; /* physical addr of CHAR16 vendor string */ + u32 fw_revision; + u32 con_in_handle; + u32 con_in; + u32 con_out_handle; + u32 con_out; + u32 stderr_handle; + u32 stderr; + u32 runtime; + u32 boottime; + u32 nr_tables; + u32 tables; +} efi_system_table_32_t; + +/* kexec external ABI */ +struct efi_setup_data { + u64 fw_vendor; + u64 __unused; + u64 tables; + u64 smbios; + u64 reserved[8]; +}; + +static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) +{ + return memcmp(&left, &right, sizeof (efi_guid_t)); +} + +#ifdef CONFIG_EFI +bool __pure __efi_soft_reserve_enabled(void); + +static inline bool __pure efi_soft_reserve_enabled(void) +{ + return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) + && __efi_soft_reserve_enabled(); +} +#else +static inline bool efi_soft_reserve_enabled(void) +{ + return false; +} +#endif /* CONFIG_EFI */ +#endif /* BOOT_COMPRESSED_EFI_H */ diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000..67e7edcdf --- /dev/null +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT and IDT before we make EFI service + * calls. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identity + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include +#include +#include +#include +#include + + .code64 + .text +SYM_FUNC_START(__efi64_thunk) + push %rbp + push %rbx + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* Copy args passed on stack */ + movq 0x30(%rsp), %rbp + movq 0x38(%rsp), %rbx + movq 0x40(%rsp), %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $64, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movl %r8d, 0xc(%rsp) + movl %r9d, 0x10(%rsp) + movl %ebp, 0x14(%rsp) + movl %ebx, 0x18(%rsp) + movl %eax, 0x1c(%rsp) + + leaq 0x20(%rsp), %rbx + sgdt (%rbx) + + addq $16, %rbx + sidt (%rbx) + + leaq 1f(%rip), %rbp + + /* + * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT + * and IDT that was installed when the kernel started executing. The + * pointers were saved at the EFI stub entry point in head_64.S. + * + * Pass the saved DS selector to the 32-bit code, and use far return to + * restore the saved CS selector. + */ + leaq efi32_boot_idt(%rip), %rax + lidt (%rax) + leaq efi32_boot_gdt(%rip), %rax + lgdt (%rax) + + movzwl efi32_boot_ds(%rip), %edx + movzwq efi32_boot_cs(%rip), %rax + pushq %rax + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $64, %rsp + movq %rdi, %rax + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + /* Clear out 32-bit selector from FS and GS */ + xorl %ebx, %ebx + movl %ebx, %fs + movl %ebx, %gs + + /* + * Convert 32-bit status code into 64-bit. + */ + roll $1, %eax + rorq $1, %rax + + pop %rbx + pop %rbp + RET +SYM_FUNC_END(__efi64_thunk) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +SYM_FUNC_START_LOCAL(efi_enter32) + /* Load firmware selector into data and stack segment registers */ + movl %edx, %ds + movl %edx, %es + movl %edx, %fs + movl %edx, %gs + movl %edx, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs and IDTs. + */ + cli + + lidtl (%ebx) + subl $16, %ebx + + lgdtl (%ebx) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + pushl $__KERNEL_CS + pushl %ebp + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +SYM_FUNC_END(efi_enter32) + + .data + .balign 8 +SYM_DATA_START(efi32_boot_gdt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_gdt) + +SYM_DATA_START(efi32_boot_idt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_idt) + +SYM_DATA_START(efi32_boot_cs) + .word 0 +SYM_DATA_END(efi32_boot_cs) + +SYM_DATA_START(efi32_boot_ds) + .word 0 +SYM_DATA_END(efi32_boot_ds) diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c new file mode 100644 index 000000000..c881878e5 --- /dev/null +++ b/arch/x86/boot/compressed/error.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Callers outside of misc.c need access to the error reporting routines, + * but the *_putstr() functions need to stay in misc.c because of how + * memcpy() and memmove() are defined for the compressed boot environment. + */ +#include "misc.h" +#include "error.h" + +void warn(char *m) +{ + error_putstr("\n\n"); + error_putstr(m); + error_putstr("\n\n"); +} + +void error(char *m) +{ + warn(m); + error_putstr(" -- System halted"); + + while (1) + asm("hlt"); +} diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h new file mode 100644 index 000000000..1de582118 --- /dev/null +++ b/arch/x86/boot/compressed/error.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_ERROR_H +#define BOOT_COMPRESSED_ERROR_H + +#include + +void warn(char *m); +void error(char *m) __noreturn; + +#endif /* BOOT_COMPRESSED_ERROR_H */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S new file mode 100644 index 000000000..3b354eb95 --- /dev/null +++ b/arch/x86/boot/compressed/head_32.S @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + .text + +#include +#include +#include +#include +#include +#include +#include + +/* + * These symbols needed to be marked as .hidden to prevent the BFD linker from + * generating R_386_32 (rather than R_386_RELATIVE) relocations for them when + * the 32-bit compressed kernel is linked as PIE. This is no longer necessary, + * but it doesn't hurt to keep them .hidden. + */ + .hidden _bss + .hidden _ebss + .hidden _end + + __HEAD +SYM_FUNC_START(startup_32) + cld + cli + +/* + * Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %edx + addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %edx + + /* Load new GDT */ + leal gdt@GOTOFF(%edx), %eax + movl %eax, 2(%eax) + lgdt (%eax) + + /* Load segment registers with our descriptors */ + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss + +/* + * %edx contains the address we are loaded at by the boot loader (plus the + * offset to the GOT). The below code calculates %ebx to be the address where + * we should move the kernel image temporarily for safe in-place decompression + * (again, plus the offset to the GOT). + * + * %ebp is calculated to be the address that the kernel will be decompressed to. + */ + +#ifdef CONFIG_RELOCATABLE + leal startup_32@GOTOFF(%edx), %ebx + +#ifdef CONFIG_EFI_STUB +/* + * If we were loaded via the EFI LoadImage service, startup_32() will be at an + * offset to the start of the space allocated for the image. efi_pe_entry() will + * set up image_offset to tell us where the image actually starts, so that we + * can use the full available buffer. + * image_offset = startup_32 - image_base + * Otherwise image_offset will be zero and has no effect on the calculations. + */ + subl image_offset@GOTOFF(%edx), %ebx +#endif + + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jae 1f +#endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: + + movl %ebx, %ebp // Save the output address for later + /* Target address to relocate to for decompression */ + addl BP_init_size(%esi), %ebx + subl $_end@GOTOFF, %ebx + + /* Set up the stack */ + leal boot_stack_end@GOTOFF(%ebx), %esp + + /* Zero EFLAGS */ + pushl $0 + popfl + +/* + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal (_bss@GOTOFF-4)(%edx), %esi + leal (_bss@GOTOFF-4)(%ebx), %edi + movl $(_bss - startup_32), %ecx + shrl $2, %ecx + std + rep movsl + cld + popl %esi + + /* + * The GDT may get overwritten either during the copy we just did or + * during extract_kernel below. To avoid any issues, repoint the GDTR + * to the new copy of the GDT. + */ + leal gdt@GOTOFF(%ebx), %eax + movl %eax, 2(%eax) + lgdt (%eax) + +/* + * Jump to the relocated address. + */ + leal .Lrelocated@GOTOFF(%ebx), %eax + jmp *%eax +SYM_FUNC_END(startup_32) + +#ifdef CONFIG_EFI_STUB +SYM_FUNC_START(efi32_stub_entry) + add $0x4, %esp + movl 8(%esp), %esi /* save boot_params pointer */ + call efi_main + /* efi_main returns the possibly relocated address of startup_32 */ + jmp *%eax +SYM_FUNC_END(efi32_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) +#endif + + .text +SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) + +/* + * Clear BSS (stack is currently empty) + */ + xorl %eax, %eax + leal _bss@GOTOFF(%ebx), %edi + leal _ebss@GOTOFF(%ebx), %ecx + subl %edi, %ecx + shrl $2, %ecx + rep stosl + +/* + * Do the extraction, and jump to the new kernel.. + */ + /* push arguments for extract_kernel: */ + + pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ + pushl %ebp /* output address */ + pushl input_len@GOTOFF(%ebx) /* input_len */ + leal input_data@GOTOFF(%ebx), %eax + pushl %eax /* input_data */ + leal boot_heap@GOTOFF(%ebx), %eax + pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ + call extract_kernel /* returns kernel location in %eax */ + addl $24, %esp + +/* + * Jump to the extracted kernel. + */ + xorl %ebx, %ebx + jmp *%eax +SYM_FUNC_END(.Lrelocated) + + .data + .balign 8 +SYM_DATA_START_LOCAL(gdt) + .word gdt_end - gdt - 1 + .long 0 + .word 0 + .quad 0x0000000000000000 /* Reserved */ + .quad 0x00cf9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ +SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) + +#ifdef CONFIG_EFI_STUB +SYM_DATA(image_offset, .long 0) +#endif + +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S new file mode 100644 index 000000000..b4bd6df29 --- /dev/null +++ b/arch/x86/boot/compressed/head_64.S @@ -0,0 +1,1022 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + .code32 + .text + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pgtable.h" + +/* + * Locally defined symbols should be marked hidden: + */ + .hidden _bss + .hidden _ebss + .hidden _end + + __HEAD + +/* + * This macro gives the relative virtual address of X, i.e. the offset of X + * from startup_32. This is the same as the link-time virtual address of X, + * since startup_32 is at 0, but defining it this way tells the + * assembler/linker that we do not want the actual run-time address of X. This + * prevents the linker from trying to create unwanted run-time relocation + * entries for the reference when the compressed kernel is linked as PIE. + * + * A reference X(%reg) will result in the link-time VA of X being stored with + * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that + * adds the 64-bit base address where the kernel is loaded. + * + * Replacing it with (X-startup_32)(%reg) results in the offset being stored, + * and no run-time relocation. + * + * The macro should be used as a displacement with a base register containing + * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate + * [$ rva(X)]. + * + * This macro can only be used from within the .head.text section, since the + * expression requires startup_32 to be in the same section as the code being + * assembled. + */ +#define rva(X) ((X) - startup_32) + + .code32 +SYM_FUNC_START(startup_32) + /* + * 32bit entry is 0 and it is ABI so immutable! + * If we come here directly from a bootloader, + * kernel(text+data+bss+brk) ramdisk, zero_page, command line + * all need to be under the 4G limit. + */ + cld + cli + +/* + * Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $ rva(1b), %ebp + + /* Load new GDT with the 64bit segments using 32bit descriptor */ + leal rva(gdt)(%ebp), %eax + movl %eax, 2(%eax) + lgdt (%eax) + + /* Load segment registers with our descriptors */ + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss + + /* Setup a stack and load CS from current GDT */ + leal rva(boot_stack_end)(%ebp), %esp + + pushl $__KERNEL32_CS + leal rva(1f)(%ebp), %eax + pushl %eax + lretl +1: + + /* Setup Exception handling for SEV-ES */ + call startup32_load_idt + + /* Make sure cpu supports long mode. */ + call verify_cpu + testl %eax, %eax + jnz .Lno_longmode + +/* + * Compute the delta between where we were compiled to run at + * and where the code will actually run at. + * + * %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + +#ifdef CONFIG_EFI_STUB +/* + * If we were loaded via the EFI LoadImage service, startup_32 will be at an + * offset to the start of the space allocated for the image. efi_pe_entry will + * set up image_offset to tell us where the image actually starts, so that we + * can use the full available buffer. + * image_offset = startup_32 - image_base + * Otherwise image_offset will be zero and has no effect on the calculations. + */ + subl rva(image_offset)(%ebp), %ebx +#endif + + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jae 1f +#endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: + + /* Target address to relocate to for decompression */ + addl BP_init_size(%esi), %ebx + subl $ rva(_end), %ebx + +/* + * Prepare for entering 64 bit mode + */ + + /* Enable PAE mode */ + movl %cr4, %eax + orl $X86_CR4_PAE, %eax + movl %eax, %cr4 + + /* + * Build early 4G boot pagetable + */ + /* + * If SEV is active then set the encryption mask in the page tables. + * This will insure that when the kernel is copied and decompressed + * it will be done so encrypted. + */ + call get_sev_encryption_bit + xorl %edx, %edx +#ifdef CONFIG_AMD_MEM_ENCRYPT + testl %eax, %eax + jz 1f + subl $32, %eax /* Encryption bit is always above bit 31 */ + bts %eax, %edx /* Set encryption mask for page tables */ + /* + * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that + * startup32_check_sev_cbit() will do a check. sev_enable() will + * initialize sev_status with all the bits reported by + * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT + * needs to be set for now. + */ + movl $1, rva(sev_status)(%ebp) +1: +#endif + + /* Initialize Page tables to 0 */ + leal rva(pgtable)(%ebx), %edi + xorl %eax, %eax + movl $(BOOT_INIT_PGT_SIZE/4), %ecx + rep stosl + + /* Build Level 4 */ + leal rva(pgtable + 0)(%ebx), %edi + leal 0x1007 (%edi), %eax + movl %eax, 0(%edi) + addl %edx, 4(%edi) + + /* Build Level 3 */ + leal rva(pgtable + 0x1000)(%ebx), %edi + leal 0x1007(%edi), %eax + movl $4, %ecx +1: movl %eax, 0x00(%edi) + addl %edx, 0x04(%edi) + addl $0x00001000, %eax + addl $8, %edi + decl %ecx + jnz 1b + + /* Build Level 2 */ + leal rva(pgtable + 0x2000)(%ebx), %edi + movl $0x00000183, %eax + movl $2048, %ecx +1: movl %eax, 0(%edi) + addl %edx, 4(%edi) + addl $0x00200000, %eax + addl $8, %edi + decl %ecx + jnz 1b + + /* Enable the boot page tables */ + leal rva(pgtable)(%ebx), %eax + movl %eax, %cr3 + + /* Enable Long mode in EFER (Extended Feature Enable Register) */ + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + /* After gdt is loaded */ + xorl %eax, %eax + lldt %ax + movl $__BOOT_TSS, %eax + ltr %ax + + /* + * Setup for the jump to 64bit mode + * + * When the jump is performed we will be in long mode but + * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 + * (and in turn EFER.LMA = 1). To jump into 64bit mode we use + * the new gdt/idt that has __KERNEL_CS with CS.L = 1. + * We place all of the values on our mini stack so lret can + * used to perform that far jump. + */ + leal rva(startup_64)(%ebp), %eax +#ifdef CONFIG_EFI_MIXED + movl rva(efi32_boot_args)(%ebp), %edi + testl %edi, %edi + jz 1f + leal rva(efi64_stub_entry)(%ebp), %eax + movl rva(efi32_boot_args+4)(%ebp), %esi + movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer + testl %edx, %edx + jnz 1f + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + subl $40, %esp + leal rva(efi_pe_entry)(%ebp), %eax + movl %edi, %ecx // MS calling convention + movl %esi, %edx +1: +#endif + /* Check if the C-bit position is correct when SEV is active */ + call startup32_check_sev_cbit + + pushl $__KERNEL_CS + pushl %eax + + /* Enter paged protected Mode, activating Long Mode */ + movl $CR0_STATE, %eax + movl %eax, %cr0 + + /* Jump from 32bit compatibility mode into 64bit mode. */ + lret +SYM_FUNC_END(startup_32) + +#ifdef CONFIG_EFI_MIXED + .org 0x190 +SYM_FUNC_START(efi32_stub_entry) + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + + call 1f +1: pop %ebp + subl $ rva(1b), %ebp + + movl %esi, rva(efi32_boot_args+8)(%ebp) +SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) + movl %ecx, rva(efi32_boot_args)(%ebp) + movl %edx, rva(efi32_boot_args+4)(%ebp) + movb $0, rva(efi_is64)(%ebp) + + /* Save firmware GDTR and code/data selectors */ + sgdtl rva(efi32_boot_gdt)(%ebp) + movw %cs, rva(efi32_boot_cs)(%ebp) + movw %ds, rva(efi32_boot_ds)(%ebp) + + /* Store firmware IDT descriptor */ + sidtl rva(efi32_boot_idt)(%ebp) + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + jmp startup_32 +SYM_FUNC_END(efi32_stub_entry) +#endif + + .code64 + .org 0x200 +SYM_CODE_START(startup_64) + /* + * 64bit entry is 0x200 and it is ABI so immutable! + * We come here either from startup_32 or directly from a + * 64bit bootloader. + * If we come here from a bootloader, kernel(text+data+bss+brk), + * ramdisk, zero_page, command line could be above 4G. + * We depend on an identity mapped page table being provided + * that maps our entire kernel(text+data+bss+brk), zero page + * and command line. + */ + + cld + cli + + /* Setup data segments. */ + xorl %eax, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + /* + * Compute the decompressed kernel start address. It is where + * we were loaded at aligned to a 2M boundary. %rbp contains the + * decompressed kernel start address. + * + * If it is a relocatable kernel then decompress and run the kernel + * from load address aligned to 2MB addr, otherwise decompress and + * run the kernel from LOAD_PHYSICAL_ADDR + * + * We cannot rely on the calculation done in 32-bit mode, since we + * may have been invoked via the 64-bit entry point. + */ + + /* Start with the delta to where the kernel will run at. */ +#ifdef CONFIG_RELOCATABLE + leaq startup_32(%rip) /* - $startup_32 */, %rbp + +#ifdef CONFIG_EFI_STUB +/* + * If we were loaded via the EFI LoadImage service, startup_32 will be at an + * offset to the start of the space allocated for the image. efi_pe_entry will + * set up image_offset to tell us where the image actually starts, so that we + * can use the full available buffer. + * image_offset = startup_32 - image_base + * Otherwise image_offset will be zero and has no effect on the calculations. + */ + movl image_offset(%rip), %eax + subq %rax, %rbp +#endif + + movl BP_kernel_alignment(%rsi), %eax + decl %eax + addq %rax, %rbp + notq %rax + andq %rax, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jae 1f +#endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: + + /* Target address to relocate to for decompression */ + movl BP_init_size(%rsi), %ebx + subl $ rva(_end), %ebx + addq %rbp, %rbx + + /* Set up the stack */ + leaq rva(boot_stack_end)(%rbx), %rsp + + /* + * At this point we are in long mode with 4-level paging enabled, + * but we might want to enable 5-level paging or vice versa. + * + * The problem is that we cannot do it directly. Setting or clearing + * CR4.LA57 in long mode would trigger #GP. So we need to switch off + * long mode and paging first. + * + * We also need a trampoline in lower memory to switch over from + * 4- to 5-level paging for cases when the bootloader puts the kernel + * above 4G, but didn't enable 5-level paging for us. + * + * The same trampoline can be used to switch from 5- to 4-level paging + * mode, like when starting 4-level paging kernel via kexec() when + * original kernel worked in 5-level paging mode. + * + * For the trampoline, we need the top page table to reside in lower + * memory as we don't have a way to load 64-bit values into CR3 in + * 32-bit mode. + * + * We go though the trampoline even if we don't have to: if we're + * already in a desired paging mode. This way the trampoline code gets + * tested on every boot. + */ + + /* Make sure we have GDT with 32-bit code segment */ + leaq gdt64(%rip), %rax + addq %rax, 2(%rax) + lgdt (%rax) + + /* Reload CS so IRET returns to a CS actually in the GDT */ + pushq $__KERNEL_CS + leaq .Lon_kernel_cs(%rip), %rax + pushq %rax + lretq + +.Lon_kernel_cs: + + pushq %rsi + call load_stage1_idt + popq %rsi + +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Now that the stage1 interrupt handlers are set up, #VC exceptions from + * CPUID instructions can be properly handled for SEV-ES guests. + * + * For SEV-SNP, the CPUID table also needs to be set up in advance of any + * CPUID instructions being issued, so go ahead and do that now via + * sev_enable(), which will also handle the rest of the SEV-related + * detection/setup to ensure that has been done in advance of any dependent + * code. + */ + pushq %rsi + movq %rsi, %rdi /* real mode address */ + call sev_enable + popq %rsi +#endif + + /* + * paging_prepare() sets up the trampoline and checks if we need to + * enable 5-level paging. + * + * paging_prepare() returns a two-quadword structure which lands + * into RDX:RAX: + * - Address of the trampoline is returned in RAX. + * - Non zero RDX means trampoline needs to enable 5-level + * paging. + * + * RSI holds real mode data and needs to be preserved across + * this function call. + */ + pushq %rsi + movq %rsi, %rdi /* real mode address */ + call paging_prepare + popq %rsi + + /* Save the trampoline address in RCX */ + movq %rax, %rcx + + /* Set up 32-bit addressable stack */ + leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp + + /* + * Preserve live 64-bit registers on the stack: this is necessary + * because the architecture does not guarantee that GPRs will retain + * their full 64-bit values across a 32-bit mode switch. + */ + pushq %rbp + pushq %rbx + pushq %rsi + + /* + * Push the 64-bit address of trampoline_return() onto the new stack. + * It will be used by the trampoline to return to the main code. Due to + * the 32-bit mode switch, it cannot be kept it in a register either. + */ + leaq trampoline_return(%rip), %rdi + pushq %rdi + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax + pushq %rax + lretq +trampoline_return: + /* Restore live 64-bit registers */ + popq %rsi + popq %rbx + popq %rbp + + /* Restore the stack, the 32-bit trampoline uses its own stack */ + leaq rva(boot_stack_end)(%rbx), %rsp + + /* + * cleanup_trampoline() would restore trampoline memory. + * + * RDI is address of the page table to use instead of page table + * in trampoline memory (if required). + * + * RSI holds real mode data and needs to be preserved across + * this function call. + */ + pushq %rsi + leaq rva(top_pgtable)(%rbx), %rdi + call cleanup_trampoline + popq %rsi + + /* Zero EFLAGS */ + pushq $0 + popfq + +/* + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushq %rsi + leaq (_bss-8)(%rip), %rsi + leaq rva(_bss-8)(%rbx), %rdi + movl $(_bss - startup_32), %ecx + shrl $3, %ecx + std + rep movsq + cld + popq %rsi + + /* + * The GDT may get overwritten either during the copy we just did or + * during extract_kernel below. To avoid any issues, repoint the GDTR + * to the new copy of the GDT. + */ + leaq rva(gdt64)(%rbx), %rax + leaq rva(gdt)(%rbx), %rdx + movq %rdx, 2(%rax) + lgdt (%rax) + +/* + * Jump to the relocated address. + */ + leaq rva(.Lrelocated)(%rbx), %rax + jmp *%rax +SYM_CODE_END(startup_64) + +#ifdef CONFIG_EFI_STUB + .org 0x390 +SYM_FUNC_START(efi64_stub_entry) + and $~0xf, %rsp /* realign the stack */ + movq %rdx, %rbx /* save boot_params pointer */ + call efi_main + movq %rbx,%rsi + leaq rva(startup_64)(%rax), %rax + jmp *%rax +SYM_FUNC_END(efi64_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) +#endif + + .text +SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) + +/* + * Clear BSS (stack is currently empty) + */ + xorl %eax, %eax + leaq _bss(%rip), %rdi + leaq _ebss(%rip), %rcx + subq %rdi, %rcx + shrq $3, %rcx + rep stosq + + pushq %rsi + call load_stage2_idt + + /* Pass boot_params to initialize_identity_maps() */ + movq (%rsp), %rdi + call initialize_identity_maps + popq %rsi + +/* + * Do the extraction, and jump to the new kernel.. + */ + pushq %rsi /* Save the real mode argument */ + movq %rsi, %rdi /* real mode address */ + leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ + leaq input_data(%rip), %rdx /* input_data */ + movl input_len(%rip), %ecx /* input_len */ + movq %rbp, %r8 /* output target address */ + movl output_len(%rip), %r9d /* decompressed length, end of relocs */ + call extract_kernel /* returns kernel location in %rax */ + popq %rsi + +/* + * Jump to the decompressed kernel. + */ + jmp *%rax +SYM_FUNC_END(.Lrelocated) + + .code32 +/* + * This is the 32-bit trampoline that will be copied over to low memory. + * + * Return address is at the top of the stack (might be above 4G). + * ECX contains the base address of the trampoline memory. + * Non zero RDX means trampoline needs to enable 5-level paging. + */ +SYM_CODE_START(trampoline_32bit_src) + /* Set up data and stack segments */ + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %ss + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Check what paging mode we want to be in after the trampoline */ + testl %edx, %edx + jz 1f + + /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ + movl %cr4, %eax + testl $X86_CR4_LA57, %eax + jnz 3f + jmp 2f +1: + /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ + movl %cr4, %eax + testl $X86_CR4_LA57, %eax + jz 3f +2: + /* Point CR3 to the trampoline's new top level page table */ + leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax + movl %eax, %cr3 +3: + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ + pushl %ecx + pushl %edx + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + /* Avoid writing EFER if no change was made (for TDX guest) */ + jc 1f + wrmsr +1: popl %edx + popl %ecx + +#ifdef CONFIG_X86_MCE + /* + * Preserve CR4.MCE if the kernel will enable #MC support. + * Clearing MCE may fault in some environments (that also force #MC + * support). Any machine check that occurs before #MC support is fully + * configured will crash the system regardless of the CR4.MCE value set + * here. + */ + movl %cr4, %eax + andl $X86_CR4_MCE, %eax +#else + movl $0, %eax +#endif + + /* Enable PAE and LA57 (if required) paging modes */ + orl $X86_CR4_PAE, %eax + testl %edx, %edx + jz 1f + orl $X86_CR4_LA57, %eax +1: + movl %eax, %cr4 + + /* Calculate address of paging_enabled() once we are executing in the trampoline */ + leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax + + /* Prepare the stack for far return to Long Mode */ + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging again. */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + lret +SYM_CODE_END(trampoline_32bit_src) + + .code64 +SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) + /* Return from the trampoline */ + retq +SYM_FUNC_END(.Lpaging_enabled) + + /* + * The trampoline code has a size limit. + * Make sure we fail to compile if the trampoline code grows + * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. + */ + .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE + + .code32 +SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) + /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ +1: + hlt + jmp 1b +SYM_FUNC_END(.Lno_longmode) + +#include "../../kernel/verify_cpu.S" + + .data +SYM_DATA_START_LOCAL(gdt64) + .word gdt_end - gdt - 1 + .quad gdt - gdt64 +SYM_DATA_END(gdt64) + .balign 8 +SYM_DATA_START_LOCAL(gdt) + .word gdt_end - gdt - 1 + .long 0 + .word 0 + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) + +SYM_DATA_START(boot_idt_desc) + .word boot_idt_end - boot_idt - 1 + .quad 0 +SYM_DATA_END(boot_idt_desc) + .balign 8 +SYM_DATA_START(boot_idt) + .rept BOOT_IDT_ENTRIES + .quad 0 + .quad 0 + .endr +SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) + +#ifdef CONFIG_AMD_MEM_ENCRYPT +SYM_DATA_START(boot32_idt_desc) + .word boot32_idt_end - boot32_idt - 1 + .long 0 +SYM_DATA_END(boot32_idt_desc) + .balign 8 +SYM_DATA_START(boot32_idt) + .rept 32 + .quad 0 + .endr +SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) +#endif + +#ifdef CONFIG_EFI_STUB +SYM_DATA(image_offset, .long 0) +#endif +#ifdef CONFIG_EFI_MIXED +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi_is64, .byte 1) + +#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) +#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) +#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) + + __HEAD + .code32 +SYM_FUNC_START(efi32_pe_entry) +/* + * efi_status_t efi32_pe_entry(efi_handle_t image_handle, + * efi_system_table_32_t *sys_table) + */ + + pushl %ebp + movl %esp, %ebp + pushl %eax // dummy push to allocate loaded_image + + pushl %ebx // save callee-save registers + pushl %edi + + call verify_cpu // check for long mode support + testl %eax, %eax + movl $0x80000003, %eax // EFI_UNSUPPORTED + jnz 2f + + call 1f +1: pop %ebx + subl $ rva(1b), %ebx + + /* Get the loaded image protocol pointer from the image handle */ + leal -4(%ebp), %eax + pushl %eax // &loaded_image + leal rva(loaded_image_proto)(%ebx), %eax + pushl %eax // pass the GUID address + pushl 8(%ebp) // pass the image handle + + /* + * Note the alignment of the stack frame. + * sys_table + * handle <-- 16-byte aligned on entry by ABI + * return address + * frame pointer + * loaded_image <-- local variable + * saved %ebx <-- 16-byte aligned here + * saved %edi + * &loaded_image + * &loaded_image_proto + * handle <-- 16-byte aligned for call to handle_protocol + */ + + movl 12(%ebp), %eax // sys_table + movl ST32_boottime(%eax), %eax // sys_table->boottime + call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol + addl $12, %esp // restore argument space + testl %eax, %eax + jnz 2f + + movl 8(%ebp), %ecx // image_handle + movl 12(%ebp), %edx // sys_table + movl -4(%ebp), %esi // loaded_image + movl LI32_image_base(%esi), %esi // loaded_image->image_base + movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry + /* + * We need to set the image_offset variable here since startup_32() will + * use it before we get to the 64-bit efi_pe_entry() in C code. + */ + subl %esi, %ebx + movl %ebx, rva(image_offset)(%ebp) // save image_offset + jmp efi32_pe_stub_entry + +2: popl %edi // restore callee-save registers + popl %ebx + leave + RET +SYM_FUNC_END(efi32_pe_entry) + + .section ".rodata" + /* EFI loaded image protocol GUID */ + .balign 4 +SYM_DATA_START_LOCAL(loaded_image_proto) + .long 0x5b1b31a1 + .word 0x9562, 0x11d2 + .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b +SYM_DATA_END(loaded_image_proto) +#endif + +#ifdef CONFIG_AMD_MEM_ENCRYPT + __HEAD + .code32 +/* + * Write an IDT entry into boot32_idt + * + * Parameters: + * + * %eax: Handler address + * %edx: Vector number + * + * Physical offset is expected in %ebp + */ +SYM_FUNC_START(startup32_set_idt_entry) + push %ebx + push %ecx + + /* IDT entry address to %ebx */ + leal rva(boot32_idt)(%ebp), %ebx + shl $3, %edx + addl %edx, %ebx + + /* Build IDT entry, lower 4 bytes */ + movl %eax, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + movl $__KERNEL32_CS, %ecx # Target code segment selector + shl $16, %ecx + orl %ecx, %edx + + /* Store lower 4 bytes to IDT */ + movl %edx, (%ebx) + + /* Build IDT entry, upper 4 bytes */ + movl %eax, %edx + andl $0xffff0000, %edx # Target code segment offset [31:16] + orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate + + /* Store upper 4 bytes to IDT */ + movl %edx, 4(%ebx) + + pop %ecx + pop %ebx + RET +SYM_FUNC_END(startup32_set_idt_entry) +#endif + +SYM_FUNC_START(startup32_load_idt) +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* #VC handler */ + leal rva(startup32_vc_handler)(%ebp), %eax + movl $X86_TRAP_VC, %edx + call startup32_set_idt_entry + + /* Load IDT */ + leal rva(boot32_idt)(%ebp), %eax + movl %eax, rva(boot32_idt_desc+2)(%ebp) + lidt rva(boot32_idt_desc)(%ebp) +#endif + RET +SYM_FUNC_END(startup32_load_idt) + +/* + * Check for the correct C-bit position when the startup_32 boot-path is used. + * + * The check makes use of the fact that all memory is encrypted when paging is + * disabled. The function creates 64 bits of random data using the RDRAND + * instruction. RDRAND is mandatory for SEV guests, so always available. If the + * hypervisor violates that the kernel will crash right here. + * + * The 64 bits of random data are stored to a memory location and at the same + * time kept in the %eax and %ebx registers. Since encryption is always active + * when paging is off the random data will be stored encrypted in main memory. + * + * Then paging is enabled. When the C-bit position is correct all memory is + * still mapped encrypted and comparing the register values with memory will + * succeed. An incorrect C-bit position will map all memory unencrypted, so that + * the compare will use the encrypted random data and fail. + */ +SYM_FUNC_START(startup32_check_sev_cbit) +#ifdef CONFIG_AMD_MEM_ENCRYPT + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + + /* Check for non-zero sev_status */ + movl rva(sev_status)(%ebp), %eax + testl %eax, %eax + jz 4f + + /* + * Get two 32-bit random values - Don't bail out if RDRAND fails + * because it is better to prevent forward progress if no random value + * can be gathered. + */ +1: rdrand %eax + jnc 1b +2: rdrand %ebx + jnc 2b + + /* Store to memory and keep it in the registers */ + movl %eax, rva(sev_check_data)(%ebp) + movl %ebx, rva(sev_check_data+4)(%ebp) + + /* Enable paging to see if encryption is active */ + movl %cr0, %edx /* Backup %cr0 in %edx */ + movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ + movl %ecx, %cr0 + + cmpl %eax, rva(sev_check_data)(%ebp) + jne 3f + cmpl %ebx, rva(sev_check_data+4)(%ebp) + jne 3f + + movl %edx, %cr0 /* Restore previous %cr0 */ + + jmp 4f + +3: /* Check failed - hlt the machine */ + hlt + jmp 3b + +4: + popl %edx + popl %ecx + popl %ebx + popl %eax +#endif + RET +SYM_FUNC_END(startup32_check_sev_cbit) + +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 +SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) + +SYM_DATA_START_LOCAL(boot_stack) + .fill BOOT_STACK_SIZE, 1, 0 + .balign 16 +SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) + +/* + * Space for page tables (not in .bss so not zeroed) + */ + .section ".pgtable","aw",@nobits + .balign 4096 +SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0) + +/* + * The page table is going to be used instead of page table in the trampoline + * memory. + */ +SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c new file mode 100644 index 000000000..b4155273d --- /dev/null +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This code is used on x86_64 to create page table identity mappings on + * demand by building up a new set of page tables (or appending to the + * existing ones), and then switching over to them when ready. + * + * Copyright (C) 2015-2016 Yinghai Lu + * Copyright (C) 2016 Kees Cook + */ + +/* + * Since we're dealing with identity mappings, physical and virtual + * addresses are the same, so override these defines which are ultimately + * used by the headers in misc.h. + */ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + +#include "error.h" +#include "misc.h" + +/* These actually do the work of building the kernel identity maps. */ +#include +#include +#include +#include +#include +/* Use the static base for this part of the boot process */ +#undef __PAGE_OFFSET +#define __PAGE_OFFSET __PAGE_OFFSET_BASE +#include "../../mm/ident_map.c" + +#define _SETUP +#include /* For COMMAND_LINE_SIZE */ +#undef _SETUP + +extern unsigned long get_cmd_line_ptr(void); + +/* Used by PAGE_KERN* macros: */ +pteval_t __default_kernel_pte_mask __read_mostly = ~0; + +/* Used to track our page table allocation area. */ +struct alloc_pgt_data { + unsigned char *pgt_buf; + unsigned long pgt_buf_size; + unsigned long pgt_buf_offset; +}; + +/* + * Allocates space for a page table entry, using struct alloc_pgt_data + * above. Besides the local callers, this is used as the allocation + * callback in mapping_info below. + */ +static void *alloc_pgt_page(void *context) +{ + struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; + unsigned char *entry; + + /* Validate there is space available for a new page. */ + if (pages->pgt_buf_offset >= pages->pgt_buf_size) { + debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + return NULL; + } + + /* Consumed more tables than expected? */ + if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) { + debug_putstr("pgt_buf running low in " __FILE__ "\n"); + debug_putstr("Need to raise BOOT_PGT_SIZE?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + } + + entry = pages->pgt_buf + pages->pgt_buf_offset; + pages->pgt_buf_offset += PAGE_SIZE; + + return entry; +} + +/* Used to track our allocated page tables. */ +static struct alloc_pgt_data pgt_data; + +/* The top level page table entry pointer. */ +static unsigned long top_level_pgt; + +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; + +/* + * Mapping information structure passed to kernel_ident_mapping_init(). + * Due to relocation, pointers must be assigned at run time not build time. + */ +static struct x86_mapping_info mapping_info; + +/* + * Adds the specified range to the identity mappings. + */ +void kernel_add_identity_map(unsigned long start, unsigned long end) +{ + int ret; + + /* Align boundary to 2M. */ + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + if (start >= end) + return; + + /* Build the mapping. */ + ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); + if (ret) + error("Error: kernel_ident_mapping_init() failed\n"); +} + +/* Locates and clears a region for a new top level page table. */ +void initialize_identity_maps(void *rmode) +{ + unsigned long cmdline; + struct setup_data *sd; + + /* Exclude the encryption mask from __PHYSICAL_MASK */ + physical_mask &= ~sme_me_mask; + + /* Init mapping_info with run-time function/buffer pointers. */ + mapping_info.alloc_pgt_page = alloc_pgt_page; + mapping_info.context = &pgt_data; + mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask; + mapping_info.kernpg_flag = _KERNPG_TABLE; + + /* + * It should be impossible for this not to already be true, + * but since calling this a second time would rewind the other + * counters, let's just make sure this is reset too. + */ + pgt_data.pgt_buf_offset = 0; + + /* + * If we came here via startup_32(), cr3 will be _pgtable already + * and we must append to the existing area instead of entirely + * overwriting it. + * + * With 5-level paging, we use '_pgtable' to allocate the p4d page table, + * the top-level page table is allocated separately. + * + * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level + * cases. On 4-level paging it's equal to 'top_level_pgt'. + */ + top_level_pgt = read_cr3_pa(); + if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { + pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + } else { + pgt_data.pgt_buf = _pgtable; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); + } + + /* + * New page-table is set up - map the kernel image, boot_params and the + * command line. The uncompressed kernel requires boot_params and the + * command line to be mapped in the identity mapping. Map them + * explicitly here in case the compressed kernel does not touch them, + * or does not touch all the pages covering them. + */ + kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); + boot_params = rmode; + kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); + cmdline = get_cmd_line_ptr(); + kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + + /* + * Also map the setup_data entries passed via boot_params in case they + * need to be accessed by uncompressed kernel via the identity mapping. + */ + sd = (struct setup_data *)boot_params->hdr.setup_data; + while (sd) { + unsigned long sd_addr = (unsigned long)sd; + + kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len); + sd = (struct setup_data *)sd->next; + } + + sev_prep_identity_maps(top_level_pgt); + + /* Load the new page-table. */ + write_cr3(top_level_pgt); + + /* + * Now that the required page table mappings are established and a + * GHCB can be used, check for SNP guest/HV feature compatibility. + */ + snp_check_features(); +} + +static pte_t *split_large_pmd(struct x86_mapping_info *info, + pmd_t *pmdp, unsigned long __address) +{ + unsigned long page_flags; + unsigned long address; + pte_t *pte; + pmd_t pmd; + int i; + + pte = (pte_t *)info->alloc_pgt_page(info->context); + if (!pte) + return NULL; + + address = __address & PMD_MASK; + /* No large page - clear PSE flag */ + page_flags = info->page_flag & ~_PAGE_PSE; + + /* Populate the PTEs */ + for (i = 0; i < PTRS_PER_PMD; i++) { + set_pte(&pte[i], __pte(address | page_flags)); + address += PAGE_SIZE; + } + + /* + * Ideally we need to clear the large PMD first and do a TLB + * flush before we write the new PMD. But the 2M range of the + * PMD might contain the code we execute and/or the stack + * we are on, so we can't do that. But that should be safe here + * because we are going from large to small mappings and we are + * also the only user of the page-table, so there is no chance + * of a TLB multihit. + */ + pmd = __pmd((unsigned long)pte | info->kernpg_flag); + set_pmd(pmdp, pmd); + /* Flush TLB to establish the new PMD */ + write_cr3(top_level_pgt); + + return pte + pte_index(__address); +} + +static void clflush_page(unsigned long address) +{ + unsigned int flush_size; + char *cl, *start, *end; + + /* + * Hardcode cl-size to 64 - CPUID can't be used here because that might + * cause another #VC exception and the GHCB is not ready to use yet. + */ + flush_size = 64; + start = (char *)(address & PAGE_MASK); + end = start + PAGE_SIZE; + + /* + * First make sure there are no pending writes on the cache-lines to + * flush. + */ + asm volatile("mfence" : : : "memory"); + + for (cl = start; cl != end; cl += flush_size) + clflush(cl); +} + +static int set_clr_page_flags(struct x86_mapping_info *info, + unsigned long address, + pteval_t set, pteval_t clr) +{ + pgd_t *pgdp = (pgd_t *)top_level_pgt; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep, pte; + + /* + * First make sure there is a PMD mapping for 'address'. + * It should already exist, but keep things generic. + * + * To map the page just read from it and fault it in if there is no + * mapping yet. kernel_add_identity_map() can't be called here because + * that would unconditionally map the address on PMD level, destroying + * any PTE-level mappings that might already exist. Use assembly here + * so the access won't be optimized away. + */ + asm volatile("mov %[address], %%r9" + :: [address] "g" (*(unsigned long *)address) + : "r9", "memory"); + + /* + * The page is mapped at least with PMD size - so skip checks and walk + * directly to the PMD. + */ + p4dp = p4d_offset(pgdp, address); + pudp = pud_offset(p4dp, address); + pmdp = pmd_offset(pudp, address); + + if (pmd_large(*pmdp)) + ptep = split_large_pmd(info, pmdp, address); + else + ptep = pte_offset_kernel(pmdp, address); + + if (!ptep) + return -ENOMEM; + + /* + * Changing encryption attributes of a page requires to flush it from + * the caches. + */ + if ((set | clr) & _PAGE_ENC) { + clflush_page(address); + + /* + * If the encryption attribute is being cleared, change the page state + * to shared in the RMP table. + */ + if (clr) + snp_set_page_shared(__pa(address & PAGE_MASK)); + } + + /* Update PTE */ + pte = *ptep; + pte = pte_set_flags(pte, set); + pte = pte_clear_flags(pte, clr); + set_pte(ptep, pte); + + /* + * If the encryption attribute is being set, then change the page state to + * private in the RMP entry. The page state change must be done after the PTE + * is updated. + */ + if (set & _PAGE_ENC) + snp_set_page_private(__pa(address & PAGE_MASK)); + + /* Flush TLB after changing encryption attribute */ + write_cr3(top_level_pgt); + + return 0; +} + +int set_page_decrypted(unsigned long address) +{ + return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC); +} + +int set_page_encrypted(unsigned long address) +{ + return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0); +} + +int set_page_non_present(unsigned long address) +{ + return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT); +} + +static void do_pf_error(const char *msg, unsigned long error_code, + unsigned long address, unsigned long ip) +{ + error_putstr(msg); + + error_putstr("\nError Code: "); + error_puthex(error_code); + error_putstr("\nCR2: 0x"); + error_puthex(address); + error_putstr("\nRIP relative to _head: 0x"); + error_puthex(ip - (unsigned long)_head); + error_putstr("\n"); + + error("Stopping.\n"); +} + +void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + unsigned long address = native_read_cr2(); + unsigned long end; + bool ghcb_fault; + + ghcb_fault = sev_es_check_ghcb_fault(address); + + address &= PMD_MASK; + end = address + PMD_SIZE; + + /* + * Check for unexpected error codes. Unexpected are: + * - Faults on present pages + * - User faults + * - Reserved bits set + */ + if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD)) + do_pf_error("Unexpected page-fault:", error_code, address, regs->ip); + else if (ghcb_fault) + do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip); + + /* + * Error code is sane - now identity map the 2M region around + * the faulting address. + */ + kernel_add_identity_map(address, end); +} diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c new file mode 100644 index 000000000..3cdf94b41 --- /dev/null +++ b/arch/x86/boot/compressed/idt_64.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include "misc.h" + +static void set_idt_entry(int vector, void (*handler)(void)) +{ + unsigned long address = (unsigned long)handler; + gate_desc entry; + + memset(&entry, 0, sizeof(entry)); + + entry.offset_low = (u16)(address & 0xffff); + entry.segment = __KERNEL_CS; + entry.bits.type = GATE_TRAP; + entry.bits.p = 1; + entry.offset_middle = (u16)((address >> 16) & 0xffff); + entry.offset_high = (u32)(address >> 32); + + memcpy(&boot_idt[vector], &entry, sizeof(entry)); +} + +/* Have this here so we don't need to include */ +static void load_boot_idt(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +/* Setup IDT before kernel jumping to .Lrelocated */ +void load_stage1_idt(void) +{ + boot_idt_desc.address = (unsigned long)boot_idt; + + + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) + set_idt_entry(X86_TRAP_VC, boot_stage1_vc); + + load_boot_idt(&boot_idt_desc); +} + +/* + * Setup IDT after kernel jumping to .Lrelocated. + * + * initialize_identity_maps() needs a #PF handler to be setup + * in order to be able to fault-in identity mapping ranges; see + * do_boot_page_fault(). + * + * This #PF handler setup needs to happen in load_stage2_idt() where the + * IDT is loaded and there the #VC IDT entry gets setup too. + * + * In order to be able to handle #VCs, one needs a GHCB which + * gets setup with an already set up pagetable, which is done in + * initialize_identity_maps(). And there's the catch 22: the boot #VC + * handler do_boot_stage2_vc() needs to call early_setup_ghcb() itself + * (and, especially set_page_decrypted()) because the SEV-ES setup code + * cannot initialize a GHCB as there's no #PF handler yet... + */ +void load_stage2_idt(void) +{ + boot_idt_desc.address = (unsigned long)boot_idt; + + set_idt_entry(X86_TRAP_PF, boot_page_fault); + +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Clear the second stage #VC handler in case guest types + * needing #VC have not been detected. + */ + if (sev_status & BIT(1)) + set_idt_entry(X86_TRAP_VC, boot_stage2_vc); + else + set_idt_entry(X86_TRAP_VC, NULL); +#endif + + load_boot_idt(&boot_idt_desc); +} + +void cleanup_exception_handling(void) +{ + /* + * Flush GHCB from cache and map it encrypted again when running as + * SEV-ES guest. + */ + sev_es_shutdown_ghcb(); + + /* Set a null-idt, disabling #PF and #VC handling */ + boot_idt_desc.size = 0; + boot_idt_desc.address = 0; + load_boot_idt(&boot_idt_desc); +} diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S new file mode 100644 index 000000000..22890e199 --- /dev/null +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Early IDT handler entry points + * + * Copyright (C) 2019 SUSE + * + * Author: Joerg Roedel + */ + +#include + +/* For ORIG_RAX */ +#include "../../entry/calling.h" + +.macro EXCEPTION_HANDLER name function error_code=0 +SYM_FUNC_START(\name) + + /* Build pt_regs */ + .if \error_code == 0 + pushq $0 + .endif + + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + /* Call handler with pt_regs */ + movq %rsp, %rdi + /* Error code is second parameter */ + movq ORIG_RAX(%rsp), %rsi + call \function + + /* Restore regs */ + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + + /* Remove error code and return */ + addq $8, %rsp + + iretq +SYM_FUNC_END(\name) + .endm + + .text + .code64 + +EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1 + +#ifdef CONFIG_AMD_MEM_ENCRYPT +EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1 +EXCEPTION_HANDLER boot_stage2_vc do_boot_stage2_vc error_code=1 +#endif diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c new file mode 100644 index 000000000..e476bcbd9 --- /dev/null +++ b/arch/x86/boot/compressed/kaslr.c @@ -0,0 +1,873 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kaslr.c + * + * This contains the routines needed to generate a reasonable level of + * entropy to choose a randomized kernel base address offset in support + * of Kernel Address Space Layout Randomization (KASLR). Additionally + * handles walking the physical memory maps (and tracking memory regions + * to avoid) in order to select a physical memory location that can + * contain the entire properly aligned running kernel image. + * + */ + +/* + * isspace() in linux/ctype.h is expected by next_args() to filter + * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, + * since isdigit() is implemented in both of them. Hence disable it + * here. + */ +#define BOOT_CTYPE_H + +#include "misc.h" +#include "error.h" +#include "../string.h" +#include "efi.h" + +#include +#include +#include +#include +#include +#include +#include + +#define _SETUP +#include /* For COMMAND_LINE_SIZE */ +#undef _SETUP + +extern unsigned long get_cmd_line_ptr(void); + +/* Simplified build-specific string for starting entropy. */ +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + +static unsigned long rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + unsigned long *ptr = (unsigned long *)area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple but unpredictable starting entropy. */ +static unsigned long get_boot_seed(void) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); + + return hash; +} + +#define KASLR_COMPRESSED_BOOT +#include "../../lib/kaslr.c" + + +/* Only supporting at most 4 unusable memmap regions with kaslr */ +#define MAX_MEMMAP_REGIONS 4 + +static bool memmap_too_large; + + +/* + * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit. + * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options. + */ +static u64 mem_limit; + +/* Number of immovable memory regions */ +static int num_immovable_mem; + +enum mem_avoid_index { + MEM_AVOID_ZO_RANGE = 0, + MEM_AVOID_INITRD, + MEM_AVOID_CMDLINE, + MEM_AVOID_BOOTPARAMS, + MEM_AVOID_MEMMAP_BEGIN, + MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, + MEM_AVOID_MAX, +}; + +static struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} +#include "../../../../lib/ctype.c" +#include "../../../../lib/cmdline.c" + +enum parse_mode { + PARSE_MEMMAP, + PARSE_EFI, +}; + +static int +parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) +{ + char *oldp; + + if (!p) + return -EINVAL; + + /* We don't care about this option here */ + if (!strncmp(p, "exactmap", 8)) + return -EINVAL; + + oldp = p; + *size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + switch (*p) { + case '#': + case '$': + case '!': + *start = memparse(p + 1, &p); + return 0; + case '@': + if (mode == PARSE_MEMMAP) { + /* + * memmap=nn@ss specifies usable region, should + * be skipped + */ + *size = 0; + } else { + u64 flags; + + /* + * efi_fake_mem=nn@ss:attr the attr specifies + * flags that might imply a soft-reservation. + */ + *start = memparse(p + 1, &p); + if (p && *p == ':') { + p++; + if (kstrtoull(p, 0, &flags) < 0) + *size = 0; + else if (flags & EFI_MEMORY_SP) + return 0; + } + *size = 0; + } + fallthrough; + default: + /* + * If w/o offset, only size specified, memmap=nn[KMG] has the + * same behaviour as mem=nn[KMG]. It limits the max address + * system can use. Region above the limit should be avoided. + */ + *start = 0; + return 0; + } + + return -EINVAL; +} + +static void mem_avoid_memmap(enum parse_mode mode, char *str) +{ + static int i; + + if (i >= MAX_MEMMAP_REGIONS) + return; + + while (str && (i < MAX_MEMMAP_REGIONS)) { + int rc; + u64 start, size; + char *k = strchr(str, ','); + + if (k) + *k++ = 0; + + rc = parse_memmap(str, &start, &size, mode); + if (rc < 0) + break; + str = k; + + if (start == 0) { + /* Store the specified memory limit if size > 0 */ + if (size > 0 && size < mem_limit) + mem_limit = size; + + continue; + } + + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; + i++; + } + + /* More than 4 memmaps, fail kaslr */ + if ((i >= MAX_MEMMAP_REGIONS) && str) + memmap_too_large = true; +} + +/* Store the number of 1GB huge pages which users specified: */ +static unsigned long max_gb_huge_pages; + +static void parse_gb_huge_pages(char *param, char *val) +{ + static bool gbpage_sz; + char *p; + + if (!strcmp(param, "hugepagesz")) { + p = val; + if (memparse(p, &p) != PUD_SIZE) { + gbpage_sz = false; + return; + } + + if (gbpage_sz) + warn("Repeatedly set hugeTLB page size of 1G!\n"); + gbpage_sz = true; + return; + } + + if (!strcmp(param, "hugepages") && gbpage_sz) { + p = val; + max_gb_huge_pages = simple_strtoull(p, &p, 0); + return; + } +} + +static void handle_mem_options(void) +{ + char *args = (char *)get_cmd_line_ptr(); + size_t len; + char *tmp_cmdline; + char *param, *val; + u64 mem_size; + + if (!args) + return; + + len = strnlen(args, COMMAND_LINE_SIZE-1); + tmp_cmdline = malloc(len + 1); + if (!tmp_cmdline) + error("Failed to allocate space for tmp_cmdline"); + + memcpy(tmp_cmdline, args, len); + tmp_cmdline[len] = 0; + args = tmp_cmdline; + + /* Chew leading spaces */ + args = skip_spaces(args); + + while (*args) { + args = next_arg(args, ¶m, &val); + /* Stop at -- */ + if (!val && strcmp(param, "--") == 0) + break; + + if (!strcmp(param, "memmap")) { + mem_avoid_memmap(PARSE_MEMMAP, val); + } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) { + parse_gb_huge_pages(param, val); + } else if (!strcmp(param, "mem")) { + char *p = val; + + if (!strcmp(p, "nopentium")) + continue; + mem_size = memparse(p, &p); + if (mem_size == 0) + break; + + if (mem_size < mem_limit) + mem_limit = mem_size; + } else if (!strcmp(param, "efi_fake_mem")) { + mem_avoid_memmap(PARSE_EFI, val); + } + } + + free(tmp_cmdline); + return; +} + +/* + * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM) + * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit. + * + * The mem_avoid array is used to store the ranges that need to be avoided + * when KASLR searches for an appropriate random address. We must avoid any + * regions that are unsafe to overlap with during decompression, and other + * things like the initrd, cmdline and boot_params. This comment seeks to + * explain mem_avoid as clearly as possible since incorrect mem_avoid + * memory ranges lead to really hard to debug boot failures. + * + * The initrd, cmdline, and boot_params are trivial to identify for + * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and + * MEM_AVOID_BOOTPARAMS respectively below. + * + * What is not obvious how to avoid is the range of memory that is used + * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover + * the compressed kernel (ZO) and its run space, which is used to extract + * the uncompressed kernel (VO) and relocs. + * + * ZO's full run size sits against the end of the decompression buffer, so + * we can calculate where text, data, bss, etc of ZO are positioned more + * easily. + * + * For additional background, the decompression calculations can be found + * in header.S, and the memory diagram is based on the one found in misc.c. + * + * The following conditions are already enforced by the image layouts and + * associated code: + * - input + input_size >= output + output_size + * - kernel_total_size <= init_size + * - kernel_total_size <= output_size (see Note below) + * - output + init_size >= output + output_size + * + * (Note that kernel_total_size and output_size have no fundamental + * relationship, but output_size is passed to choose_random_location + * as a maximum of the two. The diagram is showing a case where + * kernel_total_size is larger than output_size, but this case is + * handled by bumping output_size.) + * + * The above conditions can be illustrated by a diagram: + * + * 0 output input input+input_size output+init_size + * | | | | | + * | | | | | + * |-----|--------|--------|--------------|-----------|--|-------------| + * | | | + * | | | + * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size + * + * [output, output+init_size) is the entire memory range used for + * extracting the compressed image. + * + * [output, output+kernel_total_size) is the range needed for the + * uncompressed kernel (VO) and its run size (bss, brk, etc). + * + * [output, output+output_size) is VO plus relocs (i.e. the entire + * uncompressed payload contained by ZO). This is the area of the buffer + * written to during decompression. + * + * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case + * range of the copied ZO and decompression code. (i.e. the range + * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) + * + * [input, input+input_size) is the original copied compressed image (ZO) + * (i.e. it does not include its run size). This range must be avoided + * because it contains the data used for decompression. + * + * [input+input_size, output+init_size) is [_text, _end) for ZO. This + * range includes ZO's heap and stack, and must be avoided since it + * performs the decompression. + * + * Since the above two ranges need to be avoided and they are adjacent, + * they can be merged, resulting in: [input, output+init_size) which + * becomes the MEM_AVOID_ZO_RANGE below. + */ +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output) +{ + unsigned long init_size = boot_params->hdr.init_size; + u64 initrd_start, initrd_size; + unsigned long cmd_line, cmd_line_size; + + /* + * Avoid the region that is unsafe to overlap during + * decompression. + */ + mem_avoid[MEM_AVOID_ZO_RANGE].start = input; + mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; + + /* Avoid initrd. */ + initrd_start = (u64)boot_params->ext_ramdisk_image << 32; + initrd_start |= boot_params->hdr.ramdisk_image; + initrd_size = (u64)boot_params->ext_ramdisk_size << 32; + initrd_size |= boot_params->hdr.ramdisk_size; + mem_avoid[MEM_AVOID_INITRD].start = initrd_start; + mem_avoid[MEM_AVOID_INITRD].size = initrd_size; + /* No need to set mapping for initrd, it will be handled in VO. */ + + /* Avoid kernel command line. */ + cmd_line = get_cmd_line_ptr(); + /* Calculate size of cmd_line. */ + if (cmd_line) { + cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1; + mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; + mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; + } + + /* Avoid boot parameters. */ + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + + /* We don't need to set a mapping for setup_data. */ + + /* Mark the memmap regions we need to avoid */ + handle_mem_options(); + + /* Enumerate the immovable memory regions */ + num_immovable_mem = count_immovable_mem_regions(); +} + +/* + * Does this memory vector overlap a known avoided area? If so, record the + * overlap region with the lowest address. + */ +static bool mem_avoid_overlap(struct mem_vector *img, + struct mem_vector *overlap) +{ + int i; + struct setup_data *ptr; + u64 earliest = img->start + img->size; + bool is_overlapping = false; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i]) && + mem_avoid[i].start < earliest) { + *overlap = mem_avoid[i]; + earliest = overlap->start; + is_overlapping = true; + } + } + + /* Avoid all entries in the setup_data linked list. */ + ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + + avoid.start = (unsigned long)ptr; + avoid.size = sizeof(*ptr) + ptr->len; + + if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { + *overlap = avoid; + earliest = overlap->start; + is_overlapping = true; + } + + if (ptr->type == SETUP_INDIRECT && + ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) { + avoid.start = ((struct setup_indirect *)ptr->data)->addr; + avoid.size = ((struct setup_indirect *)ptr->data)->len; + + if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { + *overlap = avoid; + earliest = overlap->start; + is_overlapping = true; + } + } + + ptr = (struct setup_data *)(unsigned long)ptr->next; + } + + return is_overlapping; +} + +struct slot_area { + u64 addr; + unsigned long num; +}; + +#define MAX_SLOT_AREA 100 + +static struct slot_area slot_areas[MAX_SLOT_AREA]; +static unsigned int slot_area_index; +static unsigned long slot_max; + +static void store_slot_info(struct mem_vector *region, unsigned long image_size) +{ + struct slot_area slot_area; + + if (slot_area_index == MAX_SLOT_AREA) + return; + + slot_area.addr = region->start; + slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN; + + slot_areas[slot_area_index++] = slot_area; + slot_max += slot_area.num; +} + +/* + * Skip as many 1GB huge pages as possible in the passed region + * according to the number which users specified: + */ +static void +process_gb_huge_pages(struct mem_vector *region, unsigned long image_size) +{ + u64 pud_start, pud_end; + unsigned long gb_huge_pages; + struct mem_vector tmp; + + if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) { + store_slot_info(region, image_size); + return; + } + + /* Are there any 1GB pages in the region? */ + pud_start = ALIGN(region->start, PUD_SIZE); + pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE); + + /* No good 1GB huge pages found: */ + if (pud_start >= pud_end) { + store_slot_info(region, image_size); + return; + } + + /* Check if the head part of the region is usable. */ + if (pud_start >= region->start + image_size) { + tmp.start = region->start; + tmp.size = pud_start - region->start; + store_slot_info(&tmp, image_size); + } + + /* Skip the good 1GB pages. */ + gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT; + if (gb_huge_pages > max_gb_huge_pages) { + pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT); + max_gb_huge_pages = 0; + } else { + max_gb_huge_pages -= gb_huge_pages; + } + + /* Check if the tail part of the region is usable. */ + if (region->start + region->size >= pud_end + image_size) { + tmp.start = pud_end; + tmp.size = region->start + region->size - pud_end; + store_slot_info(&tmp, image_size); + } +} + +static u64 slots_fetch_random(void) +{ + unsigned long slot; + unsigned int i; + + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + slot = kaslr_get_random_long("Physical") % slot_max; + + for (i = 0; i < slot_area_index; i++) { + if (slot >= slot_areas[i].num) { + slot -= slot_areas[i].num; + continue; + } + return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN); + } + + if (i == slot_area_index) + debug_putstr("slots_fetch_random() failed!?\n"); + return 0; +} + +static void __process_mem_region(struct mem_vector *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, overlap; + u64 region_end; + + /* Enforce minimum and memory limit. */ + region.start = max_t(u64, entry->start, minimum); + region_end = min(entry->start + entry->size, mem_limit); + + /* Give up if slot area array is full. */ + while (slot_area_index < MAX_SLOT_AREA) { + /* Potentially raise address to meet alignment needs. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the passed in memory entry? */ + if (region.start > region_end) + return; + + /* Reduce size by any delta from the original address. */ + region.size = region_end - region.start; + + /* Return if region can't contain decompressed kernel */ + if (region.size < image_size) + return; + + /* If nothing overlaps, store the region and return. */ + if (!mem_avoid_overlap(®ion, &overlap)) { + process_gb_huge_pages(®ion, image_size); + return; + } + + /* Store beginning of region if holds at least image_size. */ + if (overlap.start >= region.start + image_size) { + region.size = overlap.start - region.start; + process_gb_huge_pages(®ion, image_size); + } + + /* Clip off the overlapping region and start over. */ + region.start = overlap.start + overlap.size; + } +} + +static bool process_mem_region(struct mem_vector *region, + unsigned long minimum, + unsigned long image_size) +{ + int i; + /* + * If no immovable memory found, or MEMORY_HOTREMOVE disabled, + * use @region directly. + */ + if (!num_immovable_mem) { + __process_mem_region(region, minimum, image_size); + + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n"); + return true; + } + return false; + } + +#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI) + /* + * If immovable memory found, filter the intersection between + * immovable memory and @region. + */ + for (i = 0; i < num_immovable_mem; i++) { + u64 start, end, entry_end, region_end; + struct mem_vector entry; + + if (!mem_overlaps(region, &immovable_mem[i])) + continue; + + start = immovable_mem[i].start; + end = start + immovable_mem[i].size; + region_end = region->start + region->size; + + entry.start = clamp(region->start, start, end); + entry_end = clamp(region_end, start, end); + entry.size = entry_end - entry.start; + + __process_mem_region(&entry, minimum, image_size); + + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n"); + return true; + } + } +#endif + return 0; +} + +#ifdef CONFIG_EFI +/* + * Returns true if we processed the EFI memmap, which we prefer over the E820 + * table if it is available. + */ +static bool +process_efi_entries(unsigned long minimum, unsigned long image_size) +{ + struct efi_info *e = &boot_params->efi_info; + bool efi_mirror_found = false; + struct mem_vector region; + efi_memory_desc_t *md; + unsigned long pmap; + char *signature; + u32 nr_desc; + int i; + + signature = (char *)&e->efi_loader_signature; + if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && + strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) + return false; + +#ifdef CONFIG_X86_32 + /* Can't handle data above 4GB at this time */ + if (e->efi_memmap_hi) { + warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n"); + return false; + } + pmap = e->efi_memmap; +#else + pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); +#endif + + nr_desc = e->efi_memmap_size / e->efi_memdesc_size; + for (i = 0; i < nr_desc; i++) { + md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); + if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { + efi_mirror_found = true; + break; + } + } + + for (i = 0; i < nr_desc; i++) { + md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); + + /* + * Here we are more conservative in picking free memory than + * the EFI spec allows: + * + * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also + * free memory and thus available to place the kernel image into, + * but in practice there's firmware where using that memory leads + * to crashes. + * + * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free. + */ + if (md->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + + if (efi_mirror_found && + !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) + continue; + + region.start = md->phys_addr; + region.size = md->num_pages << EFI_PAGE_SHIFT; + if (process_mem_region(®ion, minimum, image_size)) + break; + } + return true; +} +#else +static inline bool +process_efi_entries(unsigned long minimum, unsigned long image_size) +{ + return false; +} +#endif + +static void process_e820_entries(unsigned long minimum, + unsigned long image_size) +{ + int i; + struct mem_vector region; + struct boot_e820_entry *entry; + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < boot_params->e820_entries; i++) { + entry = &boot_params->e820_table[i]; + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; + region.start = entry->addr; + region.size = entry->size; + if (process_mem_region(®ion, minimum, image_size)) + break; + } +} + +static unsigned long find_random_phys_addr(unsigned long minimum, + unsigned long image_size) +{ + u64 phys_addr; + + /* Bail out early if it's impossible to succeed. */ + if (minimum + image_size > mem_limit) + return 0; + + /* Check if we had too many memmaps. */ + if (memmap_too_large) { + debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n"); + return 0; + } + + if (!process_efi_entries(minimum, image_size)) + process_e820_entries(minimum, image_size); + + phys_addr = slots_fetch_random(); + + /* Perform a final check to make sure the address is in range. */ + if (phys_addr < minimum || phys_addr + image_size > mem_limit) { + warn("Invalid physical address chosen!\n"); + return 0; + } + + return (unsigned long)phys_addr; +} + +static unsigned long find_random_virt_addr(unsigned long minimum, + unsigned long image_size) +{ + unsigned long slots, random_addr; + + /* + * There are how many CONFIG_PHYSICAL_ALIGN-sized slots + * that can hold image_size within the range of minimum to + * KERNEL_IMAGE_SIZE? + */ + slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN; + + random_addr = kaslr_get_random_long("Virtual") % slots; + + return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; +} + +/* + * Since this function examines addresses much more numerically, + * it takes the input and output pointers as 'unsigned long'. + */ +void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr) +{ + unsigned long random_addr, min_addr; + + if (cmdline_find_option_bool("nokaslr")) { + warn("KASLR disabled: 'nokaslr' on cmdline."); + return; + } + + boot_params->hdr.loadflags |= KASLR_FLAG; + + if (IS_ENABLED(CONFIG_X86_32)) + mem_limit = KERNEL_IMAGE_SIZE; + else + mem_limit = MAXMEM; + + /* Record the various known unsafe memory ranges. */ + mem_avoid_init(input, input_size, *output); + + /* + * Low end of the randomization range should be the + * smaller of 512M or the initial kernel image + * location: + */ + min_addr = min(*output, 512UL << 20); + /* Make sure minimum is aligned. */ + min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN); + + /* Walk available memory entries to find a random address. */ + random_addr = find_random_phys_addr(min_addr, output_size); + if (!random_addr) { + warn("Physical KASLR disabled: no suitable memory region!"); + } else { + /* Update the new physical address location. */ + if (*output != random_addr) + *output = random_addr; + } + + + /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ + if (IS_ENABLED(CONFIG_X86_64)) + random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); + *virt_addr = random_addr; +} diff --git a/arch/x86/boot/compressed/kernel_info.S b/arch/x86/boot/compressed/kernel_info.S new file mode 100644 index 000000000..f818ee8fb --- /dev/null +++ b/arch/x86/boot/compressed/kernel_info.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + + .section ".rodata.kernel_info", "a" + + .global kernel_info + +kernel_info: + /* Header, Linux top (structure). */ + .ascii "LToP" + /* Size. */ + .long kernel_info_var_len_data - kernel_info + /* Size total. */ + .long kernel_info_end - kernel_info + + /* Maximal allowed type for setup_data and setup_indirect structs. */ + .long SETUP_TYPE_MAX + +kernel_info_var_len_data: + /* Empty for time being... */ +kernel_info_end: diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S new file mode 100644 index 000000000..a73e4d783 --- /dev/null +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#include + +#include +#include +#include + + .text + .code32 +SYM_FUNC_START(get_sev_encryption_bit) + xor %eax, %eax + +#ifdef CONFIG_AMD_MEM_ENCRYPT + push %ebx + push %ecx + push %edx + + movl $0x80000000, %eax /* CPUID to check the highest leaf */ + cpuid + cmpl $0x8000001f, %eax /* See if 0x8000001f is available */ + jb .Lno_sev + + /* + * Check for the SEV feature: + * CPUID Fn8000_001F[EAX] - Bit 1 + * CPUID Fn8000_001F[EBX] - Bits 5:0 + * Pagetable bit position used to indicate encryption + */ + movl $0x8000001f, %eax + cpuid + bt $1, %eax /* Check if SEV is available */ + jnc .Lno_sev + + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + bt $MSR_AMD64_SEV_ENABLED_BIT, %eax /* Check if SEV is active */ + jnc .Lno_sev + + movl %ebx, %eax + andl $0x3f, %eax /* Return the encryption bit location */ + jmp .Lsev_exit + +.Lno_sev: + xor %eax, %eax + +.Lsev_exit: + pop %edx + pop %ecx + pop %ebx + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + + RET +SYM_FUNC_END(get_sev_encryption_bit) + +/** + * sev_es_req_cpuid - Request a CPUID value from the Hypervisor using + * the GHCB MSR protocol + * + * @%eax: Register to request (0=EAX, 1=EBX, 2=ECX, 3=EDX) + * @%edx: CPUID Function + * + * Returns 0 in %eax on success, non-zero on failure + * %edx returns CPUID value on success + */ +SYM_CODE_START_LOCAL(sev_es_req_cpuid) + shll $30, %eax + orl $0x00000004, %eax + movl $MSR_AMD64_SEV_ES_GHCB, %ecx + wrmsr + rep; vmmcall # VMGEXIT + rdmsr + + /* Check response */ + movl %eax, %ecx + andl $0x3ffff000, %ecx # Bits [12-29] MBZ + jnz 2f + + /* Check return code */ + andl $0xfff, %eax + cmpl $5, %eax + jne 2f + + /* All good - return success */ + xorl %eax, %eax +1: + RET +2: + movl $-1, %eax + jmp 1b +SYM_CODE_END(sev_es_req_cpuid) + +SYM_CODE_START(startup32_vc_handler) + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + + /* Keep CPUID function in %ebx */ + movl %eax, %ebx + + /* Check if error-code == SVM_EXIT_CPUID */ + cmpl $0x72, 16(%esp) + jne .Lfail + + movl $0, %eax # Request CPUID[fn].EAX + movl %ebx, %edx # CPUID fn + call sev_es_req_cpuid # Call helper + testl %eax, %eax # Check return code + jnz .Lfail + movl %edx, 12(%esp) # Store result + + movl $1, %eax # Request CPUID[fn].EBX + movl %ebx, %edx # CPUID fn + call sev_es_req_cpuid # Call helper + testl %eax, %eax # Check return code + jnz .Lfail + movl %edx, 8(%esp) # Store result + + movl $2, %eax # Request CPUID[fn].ECX + movl %ebx, %edx # CPUID fn + call sev_es_req_cpuid # Call helper + testl %eax, %eax # Check return code + jnz .Lfail + movl %edx, 4(%esp) # Store result + + movl $3, %eax # Request CPUID[fn].EDX + movl %ebx, %edx # CPUID fn + call sev_es_req_cpuid # Call helper + testl %eax, %eax # Check return code + jnz .Lfail + movl %edx, 0(%esp) # Store result + + /* + * Sanity check CPUID results from the Hypervisor. See comment in + * do_vc_no_ghcb() for more details on why this is necessary. + */ + + /* Fail if SEV leaf not available in CPUID[0x80000000].EAX */ + cmpl $0x80000000, %ebx + jne .Lcheck_sev + cmpl $0x8000001f, 12(%esp) + jb .Lfail + jmp .Ldone + +.Lcheck_sev: + /* Fail if SEV bit not set in CPUID[0x8000001f].EAX[1] */ + cmpl $0x8000001f, %ebx + jne .Ldone + btl $1, 12(%esp) + jnc .Lfail + +.Ldone: + popl %edx + popl %ecx + popl %ebx + popl %eax + + /* Remove error code */ + addl $4, %esp + + /* Jump over CPUID instruction */ + addl $2, (%esp) + + iret +.Lfail: + /* Send terminate request to Hypervisor */ + movl $0x100, %eax + xorl %edx, %edx + movl $MSR_AMD64_SEV_ES_GHCB, %ecx + wrmsr + rep; vmmcall + + /* If request fails, go to hlt loop */ + hlt + jmp .Lfail +SYM_CODE_END(startup32_vc_handler) + + .code64 + +#include "../../kernel/sev_verify_cbit.S" + + .data + +#ifdef CONFIG_AMD_MEM_ENCRYPT + .balign 8 +SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sev_status, .quad 0) +SYM_DATA(sev_check_data, .quad 0) +#endif diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c new file mode 100644 index 000000000..cf690d871 --- /dev/null +++ b/arch/x86/boot/compressed/misc.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * misc.c + * + * This is a collection of several routines used to extract the kernel + * which includes KASLR relocation, decompression, ELF parsing, and + * relocation processing. Additionally included are the screen and serial + * output functions and related debugging support functions. + * + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + * puts by Nick Holloway 1993, better puts by Martin Mares 1995 + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + +#include "misc.h" +#include "error.h" +#include "pgtable.h" +#include "../string.h" +#include "../voffset.h" +#include + +/* + * WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically at + * run time, but no relocation processing is performed. This means that + * it is not safe to place pointers in static structures. + */ + +/* Macros used by the included decompressor code below. */ +#define STATIC static +/* Define an externally visible malloc()/free(). */ +#define MALLOC_VISIBLE +#include + +/* + * Provide definitions of memzero and memmove as some of the decompressors will + * try to define their own functions if these are not defined as macros. + */ +#define memzero(s, n) memset((s), 0, (n)) +#ifndef memmove +#define memmove memmove +/* Functions used by the included decompressor code below. */ +void *memmove(void *dest, const void *src, size_t n); +#endif + +/* + * This is set up by the setup-routine at boot-time + */ +struct boot_params *boot_params; + +struct port_io_ops pio_ops; + +memptr free_mem_ptr; +memptr free_mem_end_ptr; + +static char *vidmem; +static int vidport; + +/* These might be accessed before .bss is cleared, so use .data instead. */ +static int lines __section(".data"); +static int cols __section(".data"); + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif +/* + * NOTE: When adding a new decompressor, please update the analysis in + * ../header.S. + */ + +static void scroll(void) +{ + int i; + + memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); + for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) + vidmem[i] = ' '; +} + +#define XMTRDY 0x20 + +#define TXR 0 /* Transmit register (WRITE) */ +#define LSR 5 /* Line Status */ +static void serial_putchar(int ch) +{ + unsigned timeout = 0xffff; + + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + cpu_relax(); + + outb(ch, early_serial_base + TXR); +} + +void __putstr(const char *s) +{ + int x, y, pos; + char c; + + if (early_serial_base) { + const char *str = s; + while (*str) { + if (*str == '\n') + serial_putchar('\r'); + serial_putchar(*str++); + } + } + + if (lines == 0 || cols == 0) + return; + + x = boot_params->screen_info.orig_x; + y = boot_params->screen_info.orig_y; + + while ((c = *s++) != '\0') { + if (c == '\n') { + x = 0; + if (++y >= lines) { + scroll(); + y--; + } + } else { + vidmem[(x + cols * y) * 2] = c; + if (++x >= cols) { + x = 0; + if (++y >= lines) { + scroll(); + y--; + } + } + } + } + + boot_params->screen_info.orig_x = x; + boot_params->screen_info.orig_y = y; + + pos = (x + cols * y) * 2; /* Update cursor position */ + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); +} + +void __puthex(unsigned long value) +{ + char alpha[2] = "0"; + int bits; + + for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) { + unsigned long digit = (value >> bits) & 0xf; + + if (digit < 0xA) + alpha[0] = '0' + digit; + else + alpha[0] = 'a' + (digit - 0xA); + + __putstr(alpha); + } +} + +#ifdef CONFIG_X86_NEED_RELOCS +static void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) +{ + int *reloc; + unsigned long delta, map, ptr; + unsigned long min_addr = (unsigned long)output; + unsigned long max_addr = min_addr + (VO___bss_start - VO__text); + + /* + * Calculate the delta between where vmlinux was linked to load + * and where it was actually loaded. + */ + delta = min_addr - LOAD_PHYSICAL_ADDR; + + /* + * The kernel contains a table of relocation addresses. Those + * addresses have the final load address of the kernel in virtual + * memory. We are currently working in the self map. So we need to + * create an adjustment for kernel memory addresses to the self map. + * This will involve subtracting out the base address of the kernel. + */ + map = delta - __START_KERNEL_map; + + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if KASLR has chosen a different starting address offset + * from __START_KERNEL_map. + */ + if (IS_ENABLED(CONFIG_X86_64)) + delta = virt_addr - LOAD_PHYSICAL_ADDR; + + if (!delta) { + debug_putstr("No relocation needed... "); + return; + } + debug_putstr("Performing relocations... "); + + /* + * Process relocations: 32 bit relocations first then 64 bit after. + * Three sets of binary relocations are added to the end of the kernel + * before compression. Each relocation table entry is the kernel + * address of the location which needs to be updated stored as a + * 32-bit value which is sign extended to 64 bits. + * + * Format is: + * + * kernel bits... + * 0 - zero terminator for 64 bit relocations + * 64 bit relocation repeated + * 0 - zero terminator for inverse 32 bit relocations + * 32 bit inverse relocation repeated + * 0 - zero terminator for 32 bit relocations + * 32 bit relocation repeated + * + * So we work backwards from the end of the decompressed image. + */ + for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { + long extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("32-bit relocation outside of kernel!\n"); + + *(uint32_t *)ptr += delta; + } +#ifdef CONFIG_X86_64 + while (*--reloc) { + long extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("inverse 32-bit relocation outside of kernel!\n"); + + *(int32_t *)ptr -= delta; + } + for (reloc--; *reloc; reloc--) { + long extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("64-bit relocation outside of kernel!\n"); + + *(uint64_t *)ptr += delta; + } +#endif +} +#else +static inline void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) +{ } +#endif + +static void parse_elf(void *output) +{ +#ifdef CONFIG_X86_64 + Elf64_Ehdr ehdr; + Elf64_Phdr *phdrs, *phdr; +#else + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *phdr; +#endif + void *dest; + int i; + + memcpy(&ehdr, output, sizeof(ehdr)); + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || + ehdr.e_ident[EI_MAG3] != ELFMAG3) { + error("Kernel is not a valid ELF file"); + return; + } + + debug_putstr("Parsing ELF... "); + + phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); + if (!phdrs) + error("Failed to allocate space for phdrs"); + + memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + + for (i = 0; i < ehdr.e_phnum; i++) { + phdr = &phdrs[i]; + + switch (phdr->p_type) { + case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif +#ifdef CONFIG_RELOCATABLE + dest = output; + dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); +#else + dest = (void *)(phdr->p_paddr); +#endif + memmove(dest, output + phdr->p_offset, phdr->p_filesz); + break; + default: /* Ignore other PT_* */ break; + } + } + + free(phdrs); +} + +/* + * The compressed kernel image (ZO), has been moved so that its position + * is against the end of the buffer used to hold the uncompressed kernel + * image (VO) and the execution environment (.bss, .brk), which makes sure + * there is room to do the in-place decompression. (See header.S for the + * calculations.) + * + * |-----compressed kernel image------| + * V V + * 0 extract_offset +INIT_SIZE + * |-----------|---------------|-------------------------|--------| + * | | | | + * VO__text startup_32 of ZO VO__end ZO__end + * ^ ^ + * |-------uncompressed kernel image---------| + * + */ +asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + unsigned char *input_data, + unsigned long input_len, + unsigned char *output, + unsigned long output_len) +{ + const unsigned long kernel_total_size = VO__end - VO__text; + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long needed_size; + + /* Retain x86 boot parameters pointer passed from startup_32/64. */ + boot_params = rmode; + + /* Clear flags intended for solely in-kernel use. */ + boot_params->hdr.loadflags &= ~KASLR_FLAG; + + sanitize_boot_params(boot_params); + + if (boot_params->screen_info.orig_video_mode == 7) { + vidmem = (char *) 0xb0000; + vidport = 0x3b4; + } else { + vidmem = (char *) 0xb8000; + vidport = 0x3d4; + } + + lines = boot_params->screen_info.orig_video_lines; + cols = boot_params->screen_info.orig_video_cols; + + init_default_io_ops(); + + /* + * Detect TDX guest environment. + * + * It has to be done before console_init() in order to use + * paravirtualized port I/O operations if needed. + */ + early_tdx_detect(); + + console_init(); + + /* + * Save RSDP address for later use. Have this after console_init() + * so that early debugging output from the RSDP parsing code can be + * collected. + */ + boot_params->acpi_rsdp_addr = get_rsdp_addr(); + + debug_putstr("early console in extract_kernel\n"); + + free_mem_ptr = heap; /* Heap */ + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + + /* + * The memory hole needed for the kernel is the larger of either + * the entire decompressed kernel plus relocation table, or the + * entire decompressed kernel plus .bss and .brk sections. + * + * On X86_64, the memory is mapped with PMD pages. Round the + * size up so that the full extent of PMD pages mapped is + * included in the check against the valid memory table + * entries. This ensures the full mapped area is usable RAM + * and doesn't include any reserved areas. + */ + needed_size = max(output_len, kernel_total_size); +#ifdef CONFIG_X86_64 + needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); +#endif + + /* Report initial kernel position details. */ + debug_putaddr(input_data); + debug_putaddr(input_len); + debug_putaddr(output); + debug_putaddr(output_len); + debug_putaddr(kernel_total_size); + debug_putaddr(needed_size); + +#ifdef CONFIG_X86_64 + /* Report address of 32-bit trampoline */ + debug_putaddr(trampoline_32bit); +#endif + + choose_random_location((unsigned long)input_data, input_len, + (unsigned long *)&output, + needed_size, + &virt_addr); + + /* Validate memory location choices. */ + if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) + error("Destination physical address inappropriately aligned"); + if (virt_addr & (MIN_KERNEL_ALIGN - 1)) + error("Destination virtual address inappropriately aligned"); +#ifdef CONFIG_X86_64 + if (heap > 0x3fffffffffffUL) + error("Destination address too large"); + if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + error("Destination virtual address is beyond the kernel mapping area"); +#else + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#endif +#ifndef CONFIG_RELOCATABLE + if (virt_addr != LOAD_PHYSICAL_ADDR) + error("Destination virtual address changed when not relocatable"); +#endif + + debug_putstr("\nDecompressing Linux... "); + __decompress(input_data, input_len, NULL, NULL, output, output_len, + NULL, error); + parse_elf(output); + handle_relocations(output, output_len, virt_addr); + debug_putstr("done.\nBooting the kernel.\n"); + + /* Disable exception handling before booting the kernel */ + cleanup_exception_handling(); + + return output; +} + +void fortify_panic(const char *name) +{ + error("detected buffer overflow"); +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h new file mode 100644 index 000000000..20118fb7c --- /dev/null +++ b/arch/x86/boot/compressed/misc.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_MISC_H +#define BOOT_COMPRESSED_MISC_H + +/* + * Special hack: we have to be careful, because no indirections are allowed here, + * and paravirt_ops is a kind of one. As it will only run in baremetal anyway, + * we just keep it from happening. (This list needs to be extended when new + * paravirt and debugging variants are added.) + */ +#undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_XXL +#undef CONFIG_PARAVIRT_SPINLOCKS +#undef CONFIG_KASAN +#undef CONFIG_KASAN_GENERIC + +#define __NO_FORTIFY + +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 + +#include +#include +#include +#include +#include +#include +#include + +#include "tdx.h" + +#define BOOT_CTYPE_H +#include + +#define BOOT_BOOT_H +#include "../ctype.h" +#include "../io.h" + +#include "efi.h" + +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + +/* boot/compressed/vmlinux start and end markers */ +extern char _head[], _end[]; + +/* misc.c */ +extern memptr free_mem_ptr; +extern memptr free_mem_end_ptr; +void *malloc(int size); +void free(void *where); +extern struct boot_params *boot_params; +void __putstr(const char *s); +void __puthex(unsigned long value); +#define error_putstr(__x) __putstr(__x) +#define error_puthex(__x) __puthex(__x) + +#ifdef CONFIG_X86_VERBOSE_BOOTUP + +#define debug_putstr(__x) __putstr(__x) +#define debug_puthex(__x) __puthex(__x) +#define debug_putaddr(__x) { \ + debug_putstr(#__x ": 0x"); \ + debug_puthex((unsigned long)(__x)); \ + debug_putstr("\n"); \ + } + +#else + +static inline void debug_putstr(const char *s) +{ } +static inline void debug_puthex(unsigned long value) +{ } +#define debug_putaddr(x) /* */ + +#endif + +/* cmdline.c */ +int cmdline_find_option(const char *option, char *buffer, int bufsize); +int cmdline_find_option_bool(const char *option); + +struct mem_vector { + u64 start; + u64 size; +}; + +#ifdef CONFIG_RANDOMIZE_BASE +/* kaslr.c */ +void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr); +#else +static inline void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr) +{ +} +#endif + +/* cpuflags.c */ +bool has_cpuflag(int flag); + +#ifdef CONFIG_X86_64 +extern int set_page_decrypted(unsigned long address); +extern int set_page_encrypted(unsigned long address); +extern int set_page_non_present(unsigned long address); +extern unsigned char _pgtable[]; +#endif + +#ifdef CONFIG_EARLY_PRINTK +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); +#else +static const int early_serial_base; +static inline void console_init(void) +{ } +#endif + +#ifdef CONFIG_AMD_MEM_ENCRYPT +void sev_enable(struct boot_params *bp); +void snp_check_features(void); +void sev_es_shutdown_ghcb(void); +extern bool sev_es_check_ghcb_fault(unsigned long address); +void snp_set_page_private(unsigned long paddr); +void snp_set_page_shared(unsigned long paddr); +void sev_prep_identity_maps(unsigned long top_level_pgt); +#else +static inline void sev_enable(struct boot_params *bp) +{ + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 unconditionally (thus here in this stub too) to + * ensure that uninitialized values from buggy bootloaders aren't + * propagated. + */ + if (bp) + bp->cc_blob_address = 0; +} +static inline void snp_check_features(void) { } +static inline void sev_es_shutdown_ghcb(void) { } +static inline bool sev_es_check_ghcb_fault(unsigned long address) +{ + return false; +} +static inline void snp_set_page_private(unsigned long paddr) { } +static inline void snp_set_page_shared(unsigned long paddr) { } +static inline void sev_prep_identity_maps(unsigned long top_level_pgt) { } +#endif + +/* acpi.c */ +#ifdef CONFIG_ACPI +acpi_physical_address get_rsdp_addr(void); +#else +static inline acpi_physical_address get_rsdp_addr(void) { return 0; } +#endif + +#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI) +extern struct mem_vector immovable_mem[MAX_NUMNODES*2]; +int count_immovable_mem_regions(void); +#else +static inline int count_immovable_mem_regions(void) { return 0; } +#endif + +/* ident_map_64.c */ +#ifdef CONFIG_X86_5LEVEL +extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; +#endif +extern void kernel_add_identity_map(unsigned long start, unsigned long end); + +/* Used by PAGE_KERN* macros: */ +extern pteval_t __default_kernel_pte_mask; + +/* idt_64.c */ +extern gate_desc boot_idt[BOOT_IDT_ENTRIES]; +extern struct desc_ptr boot_idt_desc; + +#ifdef CONFIG_X86_64 +void cleanup_exception_handling(void); +#else +static inline void cleanup_exception_handling(void) { } +#endif + +/* IDT Entry Points */ +void boot_page_fault(void); +void boot_stage1_vc(void); +void boot_stage2_vc(void); + +unsigned long sev_verify_cbit(unsigned long cr3); + +enum efi_type { + EFI_TYPE_64, + EFI_TYPE_32, + EFI_TYPE_NONE, +}; + +#ifdef CONFIG_EFI +/* helpers for early EFI config table access */ +enum efi_type efi_get_type(struct boot_params *bp); +unsigned long efi_get_system_table(struct boot_params *bp); +int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, + unsigned int *cfg_tbl_len); +unsigned long efi_find_vendor_table(struct boot_params *bp, + unsigned long cfg_tbl_pa, + unsigned int cfg_tbl_len, + efi_guid_t guid); +#else +static inline enum efi_type efi_get_type(struct boot_params *bp) +{ + return EFI_TYPE_NONE; +} + +static inline unsigned long efi_get_system_table(struct boot_params *bp) +{ + return 0; +} + +static inline int efi_get_conf_table(struct boot_params *bp, + unsigned long *cfg_tbl_pa, + unsigned int *cfg_tbl_len) +{ + return -ENOENT; +} + +static inline unsigned long efi_find_vendor_table(struct boot_params *bp, + unsigned long cfg_tbl_pa, + unsigned int cfg_tbl_len, + efi_guid_t guid) +{ + return 0; +} +#endif /* CONFIG_EFI */ + +#endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c new file mode 100644 index 000000000..52aa56cdb --- /dev/null +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * H. Peter Anvin + * + * ----------------------------------------------------------------------- + * + * Outputs a small assembly wrapper with the appropriate symbols defined. + */ + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + uint32_t olen; + long ilen; + FILE *f = NULL; + int retval = 1; + + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); + goto bail; + } + + /* Get the information for the compressed kernel image first */ + + f = fopen(argv[1], "r"); + if (!f) { + perror(argv[1]); + goto bail; + } + + + if (fseek(f, -4L, SEEK_END)) { + perror(argv[1]); + } + + if (fread(&olen, sizeof(olen), 1, f) != 1) { + perror(argv[1]); + goto bail; + } + + ilen = ftell(f); + olen = get_unaligned_le32(&olen); + + printf(".section \".rodata..compressed\",\"a\",@progbits\n"); + printf(".globl z_input_len\n"); + printf("z_input_len = %lu\n", ilen); + printf(".globl z_output_len\n"); + printf("z_output_len = %lu\n", (unsigned long)olen); + + printf(".globl input_data, input_data_end\n"); + printf("input_data:\n"); + printf(".incbin \"%s\"\n", argv[1]); + printf("input_data_end:\n"); + + printf(".section \".rodata\",\"a\",@progbits\n"); + printf(".globl input_len\n"); + printf("input_len:\n\t.long %lu\n", ilen); + printf(".globl output_len\n"); + printf("output_len:\n\t.long %lu\n", (unsigned long)olen); + + retval = 0; +bail: + if (f) + fclose(f); + return retval; +} diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h new file mode 100644 index 000000000..cc9b2529a --- /dev/null +++ b/arch/x86/boot/compressed/pgtable.h @@ -0,0 +1,20 @@ +#ifndef BOOT_COMPRESSED_PAGETABLE_H +#define BOOT_COMPRESSED_PAGETABLE_H + +#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) + +#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 + +#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE +#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 + +#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE + +#ifndef __ASSEMBLER__ + +extern unsigned long *trampoline_32bit; + +extern void trampoline_32bit_src(void *return_ptr); + +#endif /* __ASSEMBLER__ */ +#endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c new file mode 100644 index 000000000..2ac12ff41 --- /dev/null +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "misc.h" +#include +#include +#include "pgtable.h" +#include "../string.h" +#include "efi.h" + +#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ +#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ + +#ifdef CONFIG_X86_5LEVEL +/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */ +unsigned int __section(".data") __pgtable_l5_enabled; +unsigned int __section(".data") pgdir_shift = 39; +unsigned int __section(".data") ptrs_per_p4d = 1; +#endif + +struct paging_config { + unsigned long trampoline_start; + unsigned long l5_required; +}; + +/* Buffer to preserve trampoline memory */ +static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; + +/* + * Trampoline address will be printed by extract_kernel() for debugging + * purposes. + * + * Avoid putting the pointer into .bss as it will be cleared between + * paging_prepare() and extract_kernel(). + */ +unsigned long *trampoline_32bit __section(".data"); + +extern struct boot_params *boot_params; +int cmdline_find_option_bool(const char *option); + +static unsigned long find_trampoline_placement(void) +{ + unsigned long bios_start = 0, ebda_start = 0; + struct boot_e820_entry *entry; + char *signature; + int i; + + /* + * Find a suitable spot for the trampoline. + * This code is based on reserve_bios_regions(). + */ + + /* + * EFI systems may not provide legacy ROM. The memory may not be mapped + * at all. + * + * Only look for values in the legacy ROM for non-EFI system. + */ + signature = (char *)&boot_params->efi_info.efi_loader_signature; + if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && + strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { + ebda_start = *(unsigned short *)0x40e << 4; + bios_start = *(unsigned short *)0x413 << 10; + } + + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) + bios_start = BIOS_START_MAX; + + if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; + + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Find the first usable memory region under bios_start. */ + for (i = boot_params->e820_entries - 1; i >= 0; i--) { + unsigned long new = bios_start; + + entry = &boot_params->e820_table[i]; + + /* Skip all entries above bios_start. */ + if (bios_start <= entry->addr) + continue; + + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; + + /* Adjust bios_start to the end of the entry if needed. */ + if (bios_start > entry->addr + entry->size) + new = entry->addr + entry->size; + + /* Keep bios_start page-aligned. */ + new = round_down(new, PAGE_SIZE); + + /* Skip the entry if it's too small. */ + if (new - TRAMPOLINE_32BIT_SIZE < entry->addr) + continue; + + /* Protect against underflow. */ + if (new - TRAMPOLINE_32BIT_SIZE > bios_start) + break; + + bios_start = new; + break; + } + + /* Place the trampoline just below the end of low memory */ + return bios_start - TRAMPOLINE_32BIT_SIZE; +} + +struct paging_config paging_prepare(void *rmode) +{ + struct paging_config paging_config = {}; + + /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + boot_params = rmode; + + /* + * Check if LA57 is desired and supported. + * + * There are several parts to the check: + * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y + * - if user asked to disable 5-level paging: no5lvl in cmdline + * - if the machine supports 5-level paging: + * + CPUID leaf 7 is supported + * + the leaf has the feature bit set + * + * That's substitute for boot_cpu_has() in early boot code. + */ + if (IS_ENABLED(CONFIG_X86_5LEVEL) && + !cmdline_find_option_bool("no5lvl") && + native_cpuid_eax(0) >= 7 && + (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { + paging_config.l5_required = 1; + } + + paging_config.trampoline_start = find_trampoline_placement(); + + trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + + /* Preserve trampoline memory */ + memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); + + /* Clear trampoline memory first */ + memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); + + /* Copy trampoline code in place */ + memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + + /* + * The code below prepares page table in trampoline memory. + * + * The new page table will be used by trampoline code for switching + * from 4- to 5-level paging or vice versa. + * + * If switching is not required, the page table is unused: trampoline + * code wouldn't touch CR3. + */ + + /* + * We are not going to use the page table in trampoline memory if we + * are already in the desired paging mode. + */ + if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + goto out; + + if (paging_config.l5_required) { + /* + * For 4- to 5-level paging transition, set up current CR3 as + * the first and the only entry in a new top-level page table. + */ + trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + } else { + unsigned long src; + + /* + * For 5- to 4-level paging transition, copy page table pointed + * by first entry in the current top-level page table as our + * new top-level page table. + * + * We cannot just point to the page table from trampoline as it + * may be above 4G. + */ + src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; + memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), + (void *)src, PAGE_SIZE); + } + +out: + return paging_config; +} + +void cleanup_trampoline(void *pgtable) +{ + void *trampoline_pgtable; + + trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); + + /* + * Move the top level page table out of trampoline memory, + * if it's there. + */ + if ((void *)__native_read_cr3() == trampoline_pgtable) { + memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); + } + + /* Restore trampoline memory */ + memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); + + /* Initialize variables for 5-level paging */ +#ifdef CONFIG_X86_5LEVEL + if (__read_cr4() & X86_CR4_LA57) { + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; + } +#endif +} diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c new file mode 100644 index 000000000..9c91cc40f --- /dev/null +++ b/arch/x86/boot/compressed/sev.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + */ + +/* + * misc.h needs to be first because it knows how to include the other kernel + * headers in the pre-decompression code in a way that does not break + * compilation. + */ +#include "misc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "error.h" +#include "../msr.h" + +struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); +struct ghcb *boot_ghcb; + +/* + * Copy a version of this function here - insn-eval.c can't be used in + * pre-decompression code. + */ +static bool insn_has_rep_prefix(struct insn *insn) +{ + insn_byte_t p; + int i; + + insn_get_prefixes(insn); + + for_each_insn_prefix(insn, i, p) { + if (p == 0xf2 || p == 0xf3) + return true; + } + + return false; +} + +/* + * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and + * doesn't use segments. + */ +static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) +{ + return 0UL; +} + +static inline u64 sev_es_rd_ghcb_msr(void) +{ + struct msr m; + + boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m); + + return m.q; +} + +static inline void sev_es_wr_ghcb_msr(u64 val) +{ + struct msr m; + + m.q = val; + boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m); +} + +static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) +{ + char buffer[MAX_INSN_SIZE]; + int ret; + + memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); + + ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); + if (ret < 0) + return ES_DECODE_FAILED; + + return ES_OK; +} + +static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, + void *dst, char *buf, size_t size) +{ + memcpy(dst, buf, size); + + return ES_OK; +} + +static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, + void *src, char *buf, size_t size) +{ + memcpy(buf, src, size); + + return ES_OK; +} + +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + +static bool fault_in_kernel_space(unsigned long address) +{ + return false; +} + +#undef __init +#undef __pa +#define __init +#define __pa(x) ((unsigned long)(x)) + +#define __BOOT_COMPRESSED + +/* Basic instruction decoding support needed */ +#include "../../lib/inat.c" +#include "../../lib/insn.c" + +/* Include code for early handlers */ +#include "../../kernel/sev-shared.c" + +static inline bool sev_snp_enabled(void) +{ + return sev_status & MSR_AMD64_SEV_SNP_ENABLED; +} + +static void __page_state_change(unsigned long paddr, enum psc_op op) +{ + u64 val; + + if (!sev_snp_enabled()) + return; + + /* + * If private -> shared then invalidate the page before requesting the + * state change in the RMP table. + */ + if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + + /* Issue VMGEXIT to change the page state in RMP table. */ + sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); + VMGEXIT(); + + /* Read the response of the VMGEXIT. */ + val = sev_es_rd_ghcb_msr(); + if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + /* + * Now that page state is changed in the RMP table, validate it so that it is + * consistent with the RMP entry. + */ + if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); +} + +void snp_set_page_private(unsigned long paddr) +{ + __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); +} + +void snp_set_page_shared(unsigned long paddr) +{ + __page_state_change(paddr, SNP_PAGE_STATE_SHARED); +} + +static bool early_setup_ghcb(void) +{ + if (set_page_decrypted((unsigned long)&boot_ghcb_page)) + return false; + + /* Page is now mapped decrypted, clear it */ + memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page)); + + boot_ghcb = &boot_ghcb_page; + + /* Initialize lookup tables for the instruction decoder */ + inat_init_tables(); + + /* SNP guest requires the GHCB GPA must be registered */ + if (sev_snp_enabled()) + snp_register_ghcb_early(__pa(&boot_ghcb_page)); + + return true; +} + +void sev_es_shutdown_ghcb(void) +{ + if (!boot_ghcb) + return; + + if (!sev_es_check_cpu_features()) + error("SEV-ES CPU Features missing."); + + /* + * GHCB Page must be flushed from the cache and mapped encrypted again. + * Otherwise the running kernel will see strange cache effects when + * trying to use that page. + */ + if (set_page_encrypted((unsigned long)&boot_ghcb_page)) + error("Can't map GHCB page encrypted"); + + /* + * GHCB page is mapped encrypted again and flushed from the cache. + * Mark it non-present now to catch bugs when #VC exceptions trigger + * after this point. + */ + if (set_page_non_present((unsigned long)&boot_ghcb_page)) + error("Can't unmap GHCB page"); +} + +static void __noreturn sev_es_ghcb_terminate(struct ghcb *ghcb, unsigned int set, + unsigned int reason, u64 exit_info_2) +{ + u64 exit_info_1 = SVM_VMGEXIT_TERM_REASON(set, reason); + + vc_ghcb_invalidate(ghcb); + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_TERM_REQUEST); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + +bool sev_es_check_ghcb_fault(unsigned long address) +{ + /* Check whether the fault was on the GHCB page */ + return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page); +} + +void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) +{ + struct es_em_ctxt ctxt; + enum es_result result; + + if (!boot_ghcb && !early_setup_ghcb()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + + vc_ghcb_invalidate(boot_ghcb); + result = vc_init_em_ctxt(&ctxt, regs, exit_code); + if (result != ES_OK) + goto finish; + + switch (exit_code) { + case SVM_EXIT_RDTSC: + case SVM_EXIT_RDTSCP: + result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code); + break; + case SVM_EXIT_IOIO: + result = vc_handle_ioio(boot_ghcb, &ctxt); + break; + case SVM_EXIT_CPUID: + result = vc_handle_cpuid(boot_ghcb, &ctxt); + break; + default: + result = ES_UNSUPPORTED; + break; + } + +finish: + if (result == ES_OK) + vc_finish_insn(&ctxt); + else if (result != ES_RETRY) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); +} + +static void enforce_vmpl0(void) +{ + u64 attrs; + int err; + + /* + * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically + * higher) privilege level. Here, clear the VMPL1 permission mask of the + * GHCB page. If the guest is not running at VMPL0, this will fail. + * + * If the guest is running at VMPL0, it will succeed. Even if that operation + * modifies permission bits, it is still ok to do so currently because Linux + * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks + * changing is a don't-care. + */ + attrs = 1; + if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); +} + +/* + * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need + * guest side implementation for proper functioning of the guest. If any + * of these features are enabled in the hypervisor but are lacking guest + * side implementation, the behavior of the guest will be undefined. The + * guest could fail in non-obvious way making it difficult to debug. + * + * As the behavior of reserved feature bits is unknown to be on the + * safe side add them to the required features mask. + */ +#define SNP_FEATURES_IMPL_REQ (MSR_AMD64_SNP_VTOM | \ + MSR_AMD64_SNP_REFLECT_VC | \ + MSR_AMD64_SNP_RESTRICTED_INJ | \ + MSR_AMD64_SNP_ALT_INJ | \ + MSR_AMD64_SNP_DEBUG_SWAP | \ + MSR_AMD64_SNP_VMPL_SSS | \ + MSR_AMD64_SNP_SECURE_TSC | \ + MSR_AMD64_SNP_VMGEXIT_PARAM | \ + MSR_AMD64_SNP_VMSA_REG_PROTECTION | \ + MSR_AMD64_SNP_RESERVED_BIT13 | \ + MSR_AMD64_SNP_RESERVED_BIT15 | \ + MSR_AMD64_SNP_RESERVED_MASK) + +/* + * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented + * by the guest kernel. As and when a new feature is implemented in the + * guest kernel, a corresponding bit should be added to the mask. + */ +#define SNP_FEATURES_PRESENT (0) + +void snp_check_features(void) +{ + u64 unsupported; + + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* + * Terminate the boot if hypervisor has enabled any feature lacking + * guest side implementation. Pass on the unsupported features mask through + * EXIT_INFO_2 of the GHCB protocol so that those features can be reported + * as part of the guest boot failure. + */ + unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + if (unsupported) { + if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + sev_es_ghcb_terminate(boot_ghcb, SEV_TERM_SET_GEN, + GHCB_SNP_UNSUPPORTED, unsupported); + } +} + +void sev_enable(struct boot_params *bp) +{ + unsigned int eax, ebx, ecx, edx; + struct msr m; + bool snp; + + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + /* Check for the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) + return; + + /* + * Check for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + /* Check whether SEV is supported */ + if (!(eax & BIT(1))) + return; + + /* + * Setup/preliminary detection of SNP. This will be sanity-checked + * against CPUID/MSR values later. + */ + snp = snp_init(bp); + + /* Now repeat the checks with the SNP CPUID table. */ + + /* Recheck the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) + return; + + /* + * Recheck for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + /* Check whether SEV is supported */ + if (!(eax & BIT(1))) { + if (snp) + error("SEV-SNP support indicated by CC blob, but not CPUID."); + return; + } + + /* Set the SME mask if this is an SEV guest. */ + boot_rdmsr(MSR_AMD64_SEV, &m); + sev_status = m.q; + if (!(sev_status & MSR_AMD64_SEV_ENABLED)) + return; + + /* Negotiate the GHCB protocol version. */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) { + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED); + } + + /* + * SNP is supported in v2 of the GHCB spec which mandates support for HV + * features. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + if (!(get_hv_features() & GHCB_HV_FT_SNP)) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + enforce_vmpl0(); + } + + if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); + + sme_me_mask = BIT_ULL(ebx & 0x3f); +} + +/* Search for Confidential Computing blob in the EFI config table. */ +static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) +{ + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + int ret; + + ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); + if (ret) + return NULL; + + return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, + cfg_table_len, + EFI_CC_BLOB_GUID); +} + +/* + * Initial set up of SNP relies on information provided by the + * Confidential Computing blob, which can be passed to the boot kernel + * by firmware/bootloader in the following ways: + * + * - via an entry in the EFI config table + * - via a setup_data structure, as defined by the Linux Boot Protocol + * + * Scan for the blob in that order. + */ +static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + cc_info = find_cc_blob_efi(bp); + if (cc_info) + goto found_cc_info; + + cc_info = find_cc_blob_setup_data(bp); + if (!cc_info) + return NULL; + +found_cc_info: + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + return cc_info; +} + +/* + * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks + * will verify the SNP CPUID/MSR bits. + */ +bool snp_init(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + if (!bp) + return false; + + cc_info = find_cc_blob(bp); + if (!cc_info) + return false; + + /* + * If a SNP-specific Confidential Computing blob is present, then + * firmware/bootloader have indicated SNP support. Verifying this + * involves CPUID checks which will be more reliable if the SNP + * CPUID table is used. See comments over snp_setup_cpuid_table() for + * more details. + */ + setup_cpuid_table(cc_info); + + /* + * Pass run-time kernel a pointer to CC info via boot_params so EFI + * config table doesn't need to be searched again during early startup + * phase. + */ + bp->cc_blob_address = (u32)(unsigned long)cc_info; + + return true; +} + +void sev_prep_identity_maps(unsigned long top_level_pgt) +{ + /* + * The Confidential Computing blob is used very early in uncompressed + * kernel to find the in-memory CPUID table to handle CPUID + * instructions. Make sure an identity-mapping exists so it can be + * accessed after switchover. + */ + if (sev_snp_enabled()) { + unsigned long cc_info_pa = boot_params->cc_blob_address; + struct cc_blob_sev_info *cc_info; + + kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); + + cc_info = (struct cc_blob_sev_info *)cc_info_pa; + kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len); + } + + sev_verify_cbit(top_level_pgt); +} diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c new file mode 100644 index 000000000..81fc1eaa3 --- /dev/null +++ b/arch/x86/boot/compressed/string.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This provides an optimized implementation of memcpy, and a simplified + * implementation of memset and memmove. These are used here because the + * standard kernel runtime versions are not yet available and we don't + * trust the gcc built-in implementations as they may do unexpected things + * (e.g. FPU ops) in the minimal decompression stub execution environment. + */ +#include "error.h" + +#include "../string.c" + +#ifdef CONFIG_X86_32 +static void *____memcpy(void *dest, const void *src, size_t n) +{ + int d0, d1, d2; + asm volatile( + "rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "rep ; movsb\n\t" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) + : "memory"); + + return dest; +} +#else +static void *____memcpy(void *dest, const void *src, size_t n) +{ + long d0, d1, d2; + asm volatile( + "rep ; movsq\n\t" + "movq %4,%%rcx\n\t" + "rep ; movsb\n\t" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src) + : "memory"); + + return dest; +} +#endif + +void *memset(void *s, int c, size_t n) +{ + int i; + char *ss = s; + + for (i = 0; i < n; i++) + ss[i] = c; + return s; +} + +void *memmove(void *dest, const void *src, size_t n) +{ + unsigned char *d = dest; + const unsigned char *s = src; + + if (d <= s || d - s >= n) + return ____memcpy(dest, src, n); + + while (n-- > 0) + d[n] = s[n]; + + return dest; +} + +/* Detect and warn about potential overlaps, but handle them with memmove. */ +void *memcpy(void *dest, const void *src, size_t n) +{ + if (dest > src && dest - src < n) { + warn("Avoiding potentially unsafe overlapping memcpy()!"); + return memmove(dest, src, n); + } + return ____memcpy(dest, src, n); +} + +#ifdef CONFIG_KASAN +extern void *__memset(void *s, int c, size_t n) __alias(memset); +extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove); +extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy); +#endif diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S new file mode 100644 index 000000000..46d0495e0 --- /dev/null +++ b/arch/x86/boot/compressed/tdcall.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "../../coco/tdx/tdcall.S" diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c new file mode 100644 index 000000000..918a7606f --- /dev/null +++ b/arch/x86/boot/compressed/tdx.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../cpuflags.h" +#include "../string.h" +#include "../io.h" +#include "error.h" + +#include +#include + +#include + +/* Called from __tdx_hypercall() for unrecoverable failure */ +void __tdx_hypercall_failed(void) +{ + error("TDVMCALL failed. TDX module bug?"); +} + +static inline unsigned int tdx_io_in(int size, u16 port) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = EXIT_REASON_IO_INSTRUCTION, + .r12 = size, + .r13 = 0, + .r14 = port, + }; + + if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + return UINT_MAX; + + return args.r11; +} + +static inline void tdx_io_out(int size, u16 port, u32 value) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = EXIT_REASON_IO_INSTRUCTION, + .r12 = size, + .r13 = 1, + .r14 = port, + .r15 = value, + }; + + __tdx_hypercall(&args, 0); +} + +static inline u8 tdx_inb(u16 port) +{ + return tdx_io_in(1, port); +} + +static inline void tdx_outb(u8 value, u16 port) +{ + tdx_io_out(1, port, value); +} + +static inline void tdx_outw(u16 value, u16 port) +{ + tdx_io_out(2, port, value); +} + +void early_tdx_detect(void) +{ + u32 eax, sig[3]; + + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]); + + if (memcmp(TDX_IDENT, sig, sizeof(sig))) + return; + + /* Use hypercalls instead of I/O instructions */ + pio_ops.f_inb = tdx_inb; + pio_ops.f_outb = tdx_outb; + pio_ops.f_outw = tdx_outw; +} diff --git a/arch/x86/boot/compressed/tdx.h b/arch/x86/boot/compressed/tdx.h new file mode 100644 index 000000000..9055482cd --- /dev/null +++ b/arch/x86/boot/compressed/tdx.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_TDX_H +#define BOOT_COMPRESSED_TDX_H + +#include + +#ifdef CONFIG_INTEL_TDX_GUEST +void early_tdx_detect(void); +#else +static inline void early_tdx_detect(void) { }; +#endif + +#endif /* BOOT_COMPRESSED_TDX_H */ diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S new file mode 100644 index 000000000..112b2375d --- /dev/null +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) + +#undef i386 + +#include +#include + +#ifdef CONFIG_X86_64 +OUTPUT_ARCH(i386:x86-64) +ENTRY(startup_64) +#else +OUTPUT_ARCH(i386) +ENTRY(startup_32) +#endif + +SECTIONS +{ + /* Be careful parts of head_64.S assume startup_32 is at + * address 0. + */ + . = 0; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + .rodata..compressed : { + *(.rodata..compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + *(.bss.efistub) + _edata = . ; + } + . = ALIGN(L1_CACHE_BYTES); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(8); /* For convenience during zeroing */ + _ebss = .; + } +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + .pgtable : { + _pgtable = . ; + *(.pgtable) + _epgtable = . ; + } +#endif + . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */ + _end = .; + + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS + + DISCARDS + /DISCARD/ : { + *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss) + *(.hash) *(.gnu.hash) + *(.note.*) + } + + .got.plt (INFO) : { + *(.got.plt) + } + ASSERT(SIZEOF(.got.plt) == 0 || +#ifdef CONFIG_X86_64 + SIZEOF(.got.plt) == 0x18, +#else + SIZEOF(.got.plt) == 0xc, +#endif + "Unexpected GOT/PLT entries detected!") + + /* + * Sections that should stay zero sized, which is safer to + * explicitly check instead of blindly discarding. + */ + .got : { + *(.got) + } + ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!") + + .plt : { + *(.plt) *(.plt.*) + } + ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") + + .rel.dyn : { + *(.rel.*) *(.rel_*) + } + ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!") + + .rela.dyn : { + *(.rela.*) *(.rela_*) + } + ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") +} diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S new file mode 100644 index 000000000..6afd05e81 --- /dev/null +++ b/arch/x86/boot/copy.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +#include + +/* + * Memory copy routines + */ + + .code16 + .text + +SYM_FUNC_START_NOALIGN(memcpy) + pushw %si + pushw %di + movw %ax, %di + movw %dx, %si + pushw %cx + shrw $2, %cx + rep; movsl + popw %cx + andw $3, %cx + rep; movsb + popw %di + popw %si + retl +SYM_FUNC_END(memcpy) + +SYM_FUNC_START_NOALIGN(memset) + pushw %di + movw %ax, %di + movzbl %dl, %eax + imull $0x01010101,%eax + pushw %cx + shrw $2, %cx + rep; stosl + popw %cx + andw $3, %cx + rep; stosb + popw %di + retl +SYM_FUNC_END(memset) + +SYM_FUNC_START_NOALIGN(copy_from_fs) + pushw %ds + pushw %fs + popw %ds + calll memcpy + popw %ds + retl +SYM_FUNC_END(copy_from_fs) + +SYM_FUNC_START_NOALIGN(copy_to_fs) + pushw %es + pushw %fs + popw %es + calll memcpy + popw %es + retl +SYM_FUNC_END(copy_to_fs) diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c new file mode 100644 index 000000000..0bbf4f370 --- /dev/null +++ b/arch/x86/boot/cpu.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * arch/x86/boot/cpu.c + * + * Check for obligatory CPU features and abort if the features are not + * present. + */ + +#include "boot.h" +#ifdef CONFIG_X86_FEATURE_NAMES +#include "cpustr.h" +#endif + +static char *cpu_name(int level) +{ + static char buf[6]; + + if (level == 64) { + return "x86-64"; + } else { + if (level == 15) + level = 6; + sprintf(buf, "i%d86", level); + return buf; + } +} + +static void show_cap_strs(u32 *err_flags) +{ + int i, j; +#ifdef CONFIG_X86_FEATURE_NAMES + const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs; + for (i = 0; i < NCAPINTS; i++) { + u32 e = err_flags[i]; + for (j = 0; j < 32; j++) { + if (msg_strs[0] < i || + (msg_strs[0] == i && msg_strs[1] < j)) { + /* Skip to the next string */ + msg_strs += 2; + while (*msg_strs++) + ; + } + if (e & 1) { + if (msg_strs[0] == i && + msg_strs[1] == j && + msg_strs[2]) + printf("%s ", msg_strs+2); + else + printf("%d:%d ", i, j); + } + e >>= 1; + } + } +#else + for (i = 0; i < NCAPINTS; i++) { + u32 e = err_flags[i]; + for (j = 0; j < 32; j++) { + if (e & 1) + printf("%d:%d ", i, j); + e >>= 1; + } + } +#endif +} + +int validate_cpu(void) +{ + u32 *err_flags; + int cpu_level, req_level; + + check_cpu(&cpu_level, &req_level, &err_flags); + + if (cpu_level < req_level) { + printf("This kernel requires an %s CPU, ", + cpu_name(req_level)); + printf("but only detected an %s CPU.\n", + cpu_name(cpu_level)); + return -1; + } + + if (err_flags) { + puts("This kernel requires the following features " + "not present on the CPU:\n"); + show_cap_strs(err_flags); + putchar('\n'); + return -1; + } else if (check_knl_erratum()) { + return -1; + } else { + return 0; + } +} diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c new file mode 100644 index 000000000..fed8d13ce --- /dev/null +++ b/arch/x86/boot/cpucheck.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Check for obligatory CPU features and abort if the features are not + * present. This code should be compilable as 16-, 32- or 64-bit + * code, so be very careful with types and inline assembly. + * + * This code should not contain any messages; that requires an + * additional wrapper. + * + * As written, this code is not safe for inclusion into the kernel + * proper (after FPU initialization, in particular). + */ + +#ifdef _SETUP +# include "boot.h" +#endif +#include +#include +#include +#include +#include +#include "string.h" +#include "msr.h" + +static u32 err_flags[NCAPINTS]; + +static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; + +static const u32 req_flags[NCAPINTS] = +{ + REQUIRED_MASK0, + REQUIRED_MASK1, + 0, /* REQUIRED_MASK2 not implemented in this file */ + 0, /* REQUIRED_MASK3 not implemented in this file */ + REQUIRED_MASK4, + 0, /* REQUIRED_MASK5 not implemented in this file */ + REQUIRED_MASK6, + 0, /* REQUIRED_MASK7 not implemented in this file */ + 0, /* REQUIRED_MASK8 not implemented in this file */ + 0, /* REQUIRED_MASK9 not implemented in this file */ + 0, /* REQUIRED_MASK10 not implemented in this file */ + 0, /* REQUIRED_MASK11 not implemented in this file */ + 0, /* REQUIRED_MASK12 not implemented in this file */ + 0, /* REQUIRED_MASK13 not implemented in this file */ + 0, /* REQUIRED_MASK14 not implemented in this file */ + 0, /* REQUIRED_MASK15 not implemented in this file */ + REQUIRED_MASK16, +}; + +#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) + +static int is_amd(void) +{ + return cpu_vendor[0] == A32('A', 'u', 't', 'h') && + cpu_vendor[1] == A32('e', 'n', 't', 'i') && + cpu_vendor[2] == A32('c', 'A', 'M', 'D'); +} + +static int is_centaur(void) +{ + return cpu_vendor[0] == A32('C', 'e', 'n', 't') && + cpu_vendor[1] == A32('a', 'u', 'r', 'H') && + cpu_vendor[2] == A32('a', 'u', 'l', 's'); +} + +static int is_transmeta(void) +{ + return cpu_vendor[0] == A32('G', 'e', 'n', 'u') && + cpu_vendor[1] == A32('i', 'n', 'e', 'T') && + cpu_vendor[2] == A32('M', 'x', '8', '6'); +} + +static int is_intel(void) +{ + return cpu_vendor[0] == A32('G', 'e', 'n', 'u') && + cpu_vendor[1] == A32('i', 'n', 'e', 'I') && + cpu_vendor[2] == A32('n', 't', 'e', 'l'); +} + +/* Returns a bitmask of which words we have error bits in */ +static int check_cpuflags(void) +{ + u32 err; + int i; + + err = 0; + for (i = 0; i < NCAPINTS; i++) { + err_flags[i] = req_flags[i] & ~cpu.flags[i]; + if (err_flags[i]) + err |= 1 << i; + } + + return err; +} + +/* + * Returns -1 on error. + * + * *cpu_level is set to the current CPU level; *req_level to the required + * level. x86-64 is considered level 64 for this purpose. + * + * *err_flags_ptr is set to the flags error array if there are flags missing. + */ +int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) +{ + int err; + + memset(&cpu.flags, 0, sizeof(cpu.flags)); + cpu.level = 3; + + if (has_eflag(X86_EFLAGS_AC)) + cpu.level = 4; + + get_cpuflags(); + err = check_cpuflags(); + + if (test_bit(X86_FEATURE_LM, cpu.flags)) + cpu.level = 64; + + if (err == 0x01 && + !(err_flags[0] & + ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) && + is_amd()) { + /* If this is an AMD and we're only missing SSE+SSE2, try to + turn them on */ + + struct msr m; + + boot_rdmsr(MSR_K7_HWCR, &m); + m.l &= ~(1 << 15); + boot_wrmsr(MSR_K7_HWCR, &m); + + get_cpuflags(); /* Make sure it really did something */ + err = check_cpuflags(); + } else if (err == 0x01 && + !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && + is_centaur() && cpu.model >= 6) { + /* If this is a VIA C3, we might have to enable CX8 + explicitly */ + + struct msr m; + + boot_rdmsr(MSR_VIA_FCR, &m); + m.l |= (1 << 1) | (1 << 7); + boot_wrmsr(MSR_VIA_FCR, &m); + + set_bit(X86_FEATURE_CX8, cpu.flags); + err = check_cpuflags(); + } else if (err == 0x01 && is_transmeta()) { + /* Transmeta might have masked feature bits in word 0 */ + + struct msr m, m_tmp; + u32 level = 1; + + boot_rdmsr(0x80860004, &m); + m_tmp = m; + m_tmp.l = ~0; + boot_wrmsr(0x80860004, &m_tmp); + asm("cpuid" + : "+a" (level), "=d" (cpu.flags[0]) + : : "ecx", "ebx"); + boot_wrmsr(0x80860004, &m); + + err = check_cpuflags(); + } else if (err == 0x01 && + !(err_flags[0] & ~(1 << X86_FEATURE_PAE)) && + is_intel() && cpu.level == 6 && + (cpu.model == 9 || cpu.model == 13)) { + /* PAE is disabled on this Pentium M but can be forced */ + if (cmdline_find_option_bool("forcepae")) { + puts("WARNING: Forcing PAE in CPU flags\n"); + set_bit(X86_FEATURE_PAE, cpu.flags); + err = check_cpuflags(); + } + else { + puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n"); + } + } + if (!err) + err = check_knl_erratum(); + + if (err_flags_ptr) + *err_flags_ptr = err ? err_flags : NULL; + if (cpu_level_ptr) + *cpu_level_ptr = cpu.level; + if (req_level_ptr) + *req_level_ptr = req_level; + + return (cpu.level < req_level || err) ? -1 : 0; +} + +int check_knl_erratum(void) +{ + /* + * First check for the affected model/family: + */ + if (!is_intel() || + cpu.family != 6 || + cpu.model != INTEL_FAM6_XEON_PHI_KNL) + return 0; + + /* + * This erratum affects the Accessed/Dirty bits, and can + * cause stray bits to be set in !Present PTEs. We have + * enough bits in our 64-bit PTEs (which we have on real + * 64-bit mode or PAE) to avoid using these troublesome + * bits. But, we do not have enough space in our 32-bit + * PTEs. So, refuse to run on 32-bit non-PAE kernels. + */ + if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE)) + return 0; + + puts("This 32-bit kernel can not run on this Xeon Phi x200\n" + "processor due to a processor erratum. Use a 64-bit\n" + "kernel, or enable PAE in this 32-bit kernel.\n\n"); + + return -1; +} + + diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 000000000..a83d67ec6 --- /dev/null +++ b/arch/x86/boot/cpuflags.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "bitops.h" + +#include +#include +#include +#include "cpuflags.h" + +struct cpu_features cpu; +u32 cpu_vendor[3]; + +static bool loaded_flags; + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + unsigned long cr0; + + asm volatile("mov %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("mov %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +/* + * For building the 16-bit code we want to explicitly specify 32-bit + * push/pop operations, rather than just saying 'pushf' or 'popf' and + * letting the compiler choose. But this is also included from the + * compressed/ directory where it may be 64-bit code, and thus needs + * to be 'pushfq' or 'popfq' in that case. + */ +#ifdef __x86_64__ +#define PUSHF "pushfq" +#define POPF "popfq" +#else +#define PUSHF "pushfl" +#define POPF "popfl" +#endif + +int has_eflag(unsigned long mask) +{ + unsigned long f0, f1; + + asm volatile(PUSHF " \n\t" + PUSHF " \n\t" + "pop %0 \n\t" + "mov %0,%1 \n\t" + "xor %2,%1 \n\t" + "push %1 \n\t" + POPF " \n\t" + PUSHF " \n\t" + "pop %1 \n\t" + POPF + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +/* Handle x86_32 PIC using ebx. */ +#if defined(__i386__) && defined(__PIC__) +# define EBX_REG "=r" +#else +# define EBX_REG "=b" +#endif + +void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d) +{ + asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" + "cpuid \n\t" + ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" + : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) + : "a" (id), "c" (count) + ); +} + +#define cpuid(id, a, b, c, d) cpuid_count(id, 0, a, b, c, d) + +void get_cpuflags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + u32 ignored; + + if (loaded_flags) + return; + loaded_flags = true; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], + &cpu_vendor[1]); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + cpuid(0x1, &tfms, &ignored, &cpu.flags[4], + &cpu.flags[0]); + cpu.level = (tfms >> 8) & 15; + cpu.family = cpu.level; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + if (max_intel_level >= 0x00000007) { + cpuid_count(0x00000007, 0, &ignored, &ignored, + &cpu.flags[16], &ignored); + } + + cpuid(0x80000000, &max_amd_level, &ignored, &ignored, + &ignored); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], + &cpu.flags[1]); + } + } +} diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 000000000..475b8fde9 --- /dev/null +++ b/arch/x86/boot/cpuflags.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_CPUFLAGS_H +#define BOOT_CPUFLAGS_H + +#include +#include + +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int family; /* Family, always */ + int model; + u32 flags[NCAPINTS]; +}; + +extern struct cpu_features cpu; +extern u32 cpu_vendor[3]; + +int has_eflag(unsigned long mask); +void get_cpuflags(void); +void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d); + +#endif diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h new file mode 100644 index 000000000..8f5ef2994 --- /dev/null +++ b/arch/x86/boot/ctype.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_CTYPE_H +#define BOOT_CTYPE_H + +static inline int isdigit(int ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +static inline int isxdigit(int ch) +{ + if (isdigit(ch)) + return true; + + if ((ch >= 'a') && (ch <= 'f')) + return true; + + return (ch >= 'A') && (ch <= 'F'); +} + +#endif diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c new file mode 100644 index 000000000..023bf1c3d --- /dev/null +++ b/arch/x86/boot/early_serial_console.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Serial port routines for use during early boot reporting. This code is + * included from both the compressed kernel and the regular kernel. + */ +#include "boot.h" + +#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ + +#define DLAB 0x80 + +#define TXR 0 /* Transmit register (WRITE) */ +#define RXR 0 /* Receive register (READ) */ +#define IER 1 /* Interrupt Enable */ +#define IIR 2 /* Interrupt ID */ +#define FCR 2 /* FIFO control */ +#define LCR 3 /* Line control */ +#define MCR 4 /* Modem control */ +#define LSR 5 /* Line Status */ +#define MSR 6 /* Modem Status */ +#define DLL 0 /* Divisor Latch Low */ +#define DLH 1 /* Divisor latch High */ + +#define DEFAULT_BAUD 9600 + +static void early_serial_init(int port, int baud) +{ + unsigned char c; + unsigned divisor; + + outb(0x3, port + LCR); /* 8n1 */ + outb(0, port + IER); /* no interrupt */ + outb(0, port + FCR); /* no fifo */ + outb(0x3, port + MCR); /* DTR + RTS */ + + divisor = 115200 / baud; + c = inb(port + LCR); + outb(c | DLAB, port + LCR); + outb(divisor & 0xff, port + DLL); + outb((divisor >> 8) & 0xff, port + DLH); + outb(c & ~DLAB, port + LCR); + + early_serial_base = port; +} + +static void parse_earlyprintk(void) +{ + int baud = DEFAULT_BAUD; + char arg[32]; + int pos = 0; + int port = 0; + + if (cmdline_find_option("earlyprintk", arg, sizeof(arg)) > 0) { + char *e; + + if (!strncmp(arg, "serial", 6)) { + port = DEFAULT_SERIAL_PORT; + pos += 6; + } + + if (arg[pos] == ',') + pos++; + + /* + * make sure we have + * "serial,0x3f8,115200" + * "serial,ttyS0,115200" + * "ttyS0,115200" + */ + if (pos == 7 && !strncmp(arg + pos, "0x", 2)) { + port = simple_strtoull(arg + pos, &e, 16); + if (port == 0 || arg + pos == e) + port = DEFAULT_SERIAL_PORT; + else + pos = e - arg; + } else if (!strncmp(arg + pos, "ttyS", 4)) { + static const int bases[] = { 0x3f8, 0x2f8 }; + int idx = 0; + + /* += strlen("ttyS"); */ + pos += 4; + + if (arg[pos++] == '1') + idx = 1; + + port = bases[idx]; + } + + if (arg[pos] == ',') + pos++; + + baud = simple_strtoull(arg + pos, &e, 0); + if (baud == 0 || arg + pos == e) + baud = DEFAULT_BAUD; + } + + if (port) + early_serial_init(port, baud); +} + +#define BASE_BAUD (1843200/16) +static unsigned int probe_baud(int port) +{ + unsigned char lcr, dll, dlh; + unsigned int quot; + + lcr = inb(port + LCR); + outb(lcr | DLAB, port + LCR); + dll = inb(port + DLL); + dlh = inb(port + DLH); + outb(lcr, port + LCR); + quot = (dlh << 8) | dll; + + return BASE_BAUD / quot; +} + +static void parse_console_uart8250(void) +{ + char optstr[64], *options; + int baud = DEFAULT_BAUD; + int port = 0; + + /* + * console=uart8250,io,0x3f8,115200n8 + * need to make sure it is last one console ! + */ + if (cmdline_find_option("console", optstr, sizeof(optstr)) <= 0) + return; + + options = optstr; + + if (!strncmp(options, "uart8250,io,", 12)) + port = simple_strtoull(options + 12, &options, 0); + else if (!strncmp(options, "uart,io,", 8)) + port = simple_strtoull(options + 8, &options, 0); + else + return; + + if (options && (options[0] == ',')) + baud = simple_strtoull(options + 1, &options, 0); + else + baud = probe_baud(port); + + if (port) + early_serial_init(port, baud); +} + +void console_init(void) +{ + parse_earlyprintk(); + + if (!early_serial_base) + parse_console_uart8250(); +} diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c new file mode 100644 index 000000000..1fb4bc70c --- /dev/null +++ b/arch/x86/boot/edd.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Get EDD BIOS disk information + */ + +#include "boot.h" +#include +#include "string.h" + +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + +/* + * Read the MBR (first sector) from a specific device. + */ +static int read_mbr(u8 devno, void *buf) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ax = 0x0201; /* Legacy Read, one sector */ + ireg.cx = 0x0001; /* Sector 0-0-1 */ + ireg.dl = devno; + ireg.bx = (size_t)buf; + + intcall(0x13, &ireg, &oreg); + + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ +} + +static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) +{ + int sector_size; + char *mbrbuf_ptr, *mbrbuf_end; + u32 buf_base, mbr_base; + extern char _end[]; + u16 mbr_magic; + + sector_size = ei->params.bytes_per_sector; + if (!sector_size) + sector_size = 512; /* Best available guess */ + + /* Produce a naturally aligned buffer on the heap */ + buf_base = (ds() << 4) + (u32)&_end; + mbr_base = (buf_base+sector_size-1) & ~(sector_size-1); + mbrbuf_ptr = _end + (mbr_base-buf_base); + mbrbuf_end = mbrbuf_ptr + sector_size; + + /* Make sure we actually have space on the heap... */ + if (!(boot_params.hdr.loadflags & CAN_USE_HEAP)) + return -1; + if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr) + return -1; + + memset(mbrbuf_ptr, 0, sector_size); + if (read_mbr(devno, mbrbuf_ptr)) + return -1; + + *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET]; + mbr_magic = *(u16 *)&mbrbuf_ptr[510]; + + /* check for valid MBR magic */ + return mbr_magic == 0xAA55 ? 0 : -1; +} + +static int get_edd_info(u8 devno, struct edd_info *ei) +{ + struct biosregs ireg, oreg; + + memset(ei, 0, sizeof(*ei)); + + /* Check Extensions Present */ + + initregs(&ireg); + ireg.ah = 0x41; + ireg.bx = EDDMAGIC1; + ireg.dl = devno; + intcall(0x13, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) + return -1; /* No extended information */ + + if (oreg.bx != EDDMAGIC2) + return -1; + + ei->device = devno; + ei->version = oreg.ah; /* EDD version number */ + ei->interface_support = oreg.cx; /* EDD functionality subsets */ + + /* Extended Get Device Parameters */ + + ei->params.length = sizeof(ei->params); + ireg.ah = 0x48; + ireg.si = (size_t)&ei->params; + intcall(0x13, &ireg, &oreg); + + /* Get legacy CHS parameters */ + + /* Ralf Brown recommends setting ES:DI to 0:0 */ + ireg.ah = 0x08; + ireg.es = 0; + intcall(0x13, &ireg, &oreg); + + if (!(oreg.eflags & X86_EFLAGS_CF)) { + ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2); + ei->legacy_max_head = oreg.dh; + ei->legacy_sectors_per_track = oreg.cl & 0x3f; + } + + return 0; +} + +void query_edd(void) +{ + char eddarg[8]; + int do_mbr = 1; +#ifdef CONFIG_EDD_OFF + int do_edd = 0; +#else + int do_edd = 1; +#endif + int be_quiet; + int devno; + struct edd_info ei, *edp; + u32 *mbrptr; + + if (cmdline_find_option("edd", eddarg, sizeof(eddarg)) > 0) { + if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) { + do_edd = 1; + do_mbr = 0; + } + else if (!strcmp(eddarg, "off")) + do_edd = 0; + else if (!strcmp(eddarg, "on")) + do_edd = 1; + } + + be_quiet = cmdline_find_option_bool("quiet"); + + edp = boot_params.eddbuf; + mbrptr = boot_params.edd_mbr_sig_buffer; + + if (!do_edd) + return; + + /* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe, + * so give a hint if this happens. + */ + + if (!be_quiet) + printf("Probing EDD (edd=off to disable)... "); + + for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) { + /* + * Scan the BIOS-supported hard disks and query EDD + * information... + */ + if (!get_edd_info(devno, &ei) + && boot_params.eddbuf_entries < EDDMAXNR) { + memcpy(edp, &ei, sizeof(ei)); + edp++; + boot_params.eddbuf_entries++; + } + + if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++)) + boot_params.edd_mbr_sig_buf_entries = devno-0x80+1; + } + + if (!be_quiet) + printf("ok\n"); +} + +#endif diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh new file mode 100644 index 000000000..c9299aeb7 --- /dev/null +++ b/arch/x86/boot/genimage.sh @@ -0,0 +1,272 @@ +#!/bin/bash +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2017 by Changbin Du +# +# Adapted from code in arch/x86/boot/Makefile by H. Peter Anvin and others +# +# "make fdimage/fdimage144/fdimage288/hdimage/isoimage" +# script for x86 architecture +# +# Arguments: +# $1 - fdimage format +# $2 - target image file +# $3 - kernel bzImage file +# $4 - mtools configuration file +# $5 - kernel cmdline +# $6+ - initrd image file(s) +# +# This script requires: +# bash +# syslinux +# mtools (for fdimage* and hdimage) +# edk2/OVMF (for hdimage) +# +# Otherwise try to stick to POSIX shell commands... +# + +# Use "make V=1" to debug this script +case "${KBUILD_VERBOSE}" in +*1*) + set -x + ;; +esac + +# Exit the top-level shell with an error +topshell=$$ +trap 'exit 1' USR1 +die() { + echo "" 1>&2 + echo " *** $*" 1>&2 + echo "" 1>&2 + kill -USR1 $topshell +} + +# Verify the existence and readability of a file +verify() { + if [ ! -f "$1" -o ! -r "$1" ]; then + die "Missing file: $1" + fi +} + +diskfmt="$1" +FIMAGE="$2" +FBZIMAGE="$3" +MTOOLSRC="$4" +KCMDLINE="$5" +shift 5 # Remaining arguments = initrd files + +export MTOOLSRC + +# common options for dd +dd='dd iflag=fullblock' + +# Make sure the files actually exist +verify "$FBZIMAGE" + +declare -a FDINITRDS +irdpfx=' initrd=' +initrdopts_syslinux='' +initrdopts_efi='' +for f in "$@"; do + if [ -f "$f" -a -r "$f" ]; then + FDINITRDS=("${FDINITRDS[@]}" "$f") + fname="$(basename "$f")" + initrdopts_syslinux="${initrdopts_syslinux}${irdpfx}${fname}" + irdpfx=, + initrdopts_efi="${initrdopts_efi} initrd=${fname}" + fi +done + +# Read a $3-byte littleendian unsigned value at offset $2 from file $1 +le() { + local n=0 + local m=1 + for b in $(od -A n -v -j $2 -N $3 -t u1 "$1"); do + n=$((n + b*m)) + m=$((m * 256)) + done + echo $n +} + +# Get the EFI architecture name such that boot{name}.efi is the default +# boot file name. Returns false with no output if the file is not an +# EFI image or otherwise unknown. +efiarch() { + [ -f "$1" ] || return + [ $(le "$1" 0 2) -eq 23117 ] || return # MZ magic + peoffs=$(le "$1" 60 4) # PE header offset + [ $peoffs -ge 64 ] || return + [ $(le "$1" $peoffs 4) -eq 17744 ] || return # PE magic + case $(le "$1" $((peoffs+4+20)) 2) in # PE type + 267) ;; # PE32 + 523) ;; # PE32+ + *) return 1 ;; # Invalid + esac + [ $(le "$1" $((peoffs+4+20+68)) 2) -eq 10 ] || return # EFI app + case $(le "$1" $((peoffs+4)) 2) in # Machine type + 332) echo i386 ;; + 450) echo arm ;; + 512) echo ia64 ;; + 20530) echo riscv32 ;; + 20580) echo riscv64 ;; + 20776) echo riscv128 ;; + 34404) echo x64 ;; + 43620) echo aa64 ;; + esac +} + +# Get the combined sizes in bytes of the files given, counting sparse +# files as full length, and padding each file to cluster size +cluster=16384 +filesizes() { + local t=0 + local s + for s in $(ls -lnL "$@" 2>/dev/null | awk '/^-/{ print $5; }'); do + t=$((t + ((s+cluster-1)/cluster)*cluster)) + done + echo $t +} + +# Expand directory names which should be in /usr/share into a list +# of possible alternatives +sharedirs() { + local dir file + for dir in /usr/share /usr/lib64 /usr/lib; do + for file; do + echo "$dir/$file" + echo "$dir/${file^^}" + done + done +} +efidirs() { + local dir file + for dir in /usr/share /boot /usr/lib64 /usr/lib; do + for file; do + echo "$dir/$file" + echo "$dir/${file^^}" + done + done +} + +findsyslinux() { + local f="$(find -L $(sharedirs syslinux isolinux) \ + -name "$1" -readable -type f -print -quit 2>/dev/null)" + if [ ! -f "$f" ]; then + die "Need a $1 file, please install syslinux/isolinux." + fi + echo "$f" + return 0 +} + +findovmf() { + local arch="$1" + shift + local -a names=(-false) + local name f + for name; do + names=("${names[@]}" -or -iname "$name") + done + for f in $(find -L $(efidirs edk2 ovmf) \ + \( "${names[@]}" \) -readable -type f \ + -print 2>/dev/null); do + if [ "$(efiarch "$f")" = "$arch" ]; then + echo "$f" + return 0 + fi + done + die "Need a $1 file for $arch, please install EDK2/OVMF." +} + +do_mcopy() { + if [ ${#FDINITRDS[@]} -gt 0 ]; then + mcopy "${FDINITRDS[@]}" "$1" + fi + if [ -n "$efishell" ]; then + mmd "$1"EFI "$1"EFI/Boot + mcopy "$efishell" "$1"EFI/Boot/boot${kefiarch}.efi + fi + if [ -n "$kefiarch" ]; then + echo linux "$KCMDLINE$initrdopts_efi" | \ + mcopy - "$1"startup.nsh + fi + echo default linux "$KCMDLINE$initrdopts_syslinux" | \ + mcopy - "$1"syslinux.cfg + mcopy "$FBZIMAGE" "$1"linux +} + +genbzdisk() { + verify "$MTOOLSRC" + mformat -v 'LINUX_BOOT' a: + syslinux "$FIMAGE" + do_mcopy a: +} + +genfdimage144() { + verify "$MTOOLSRC" + $dd if=/dev/zero of="$FIMAGE" bs=1024 count=1440 2>/dev/null + mformat -v 'LINUX_BOOT' v: + syslinux "$FIMAGE" + do_mcopy v: +} + +genfdimage288() { + verify "$MTOOLSRC" + $dd if=/dev/zero of="$FIMAGE" bs=1024 count=2880 2>/dev/null + mformat -v 'LINUX_BOOT' w: + syslinux "$FIMAGE" + do_mcopy w: +} + +genhdimage() { + verify "$MTOOLSRC" + mbr="$(findsyslinux mbr.bin)" + kefiarch="$(efiarch "$FBZIMAGE")" + if [ -n "$kefiarch" ]; then + # The efishell provides command line handling + efishell="$(findovmf $kefiarch shell.efi shell${kefiarch}.efi)" + ptype='-T 0xef' # EFI system partition, no GPT + fi + sizes=$(filesizes "$FBZIMAGE" "${FDINITRDS[@]}" "$efishell") + # Allow 1% + 2 MiB for filesystem and partition table overhead, + # syslinux, and config files; this is probably excessive... + megs=$(((sizes + sizes/100 + 2*1024*1024 - 1)/(1024*1024))) + $dd if=/dev/zero of="$FIMAGE" bs=$((1024*1024)) count=$megs 2>/dev/null + mpartition -I -c -s 32 -h 64 $ptype -b 64 -a p: + $dd if="$mbr" of="$FIMAGE" bs=440 count=1 conv=notrunc 2>/dev/null + mformat -v 'LINUX_BOOT' -s 32 -h 64 -c $((cluster/512)) -t $megs h: + syslinux --offset $((64*512)) "$FIMAGE" + do_mcopy h: +} + +geniso() { + tmp_dir="$(dirname "$FIMAGE")/isoimage" + rm -rf "$tmp_dir" + mkdir "$tmp_dir" + isolinux=$(findsyslinux isolinux.bin) + ldlinux=$(findsyslinux ldlinux.c32) + cp "$isolinux" "$ldlinux" "$tmp_dir" + cp "$FBZIMAGE" "$tmp_dir"/linux + echo default linux "$KCMDLINE" > "$tmp_dir"/isolinux.cfg + cp "${FDINITRDS[@]}" "$tmp_dir"/ + genisoimage -J -r -appid 'LINUX_BOOT' -input-charset=utf-8 \ + -quiet -o "$FIMAGE" -b isolinux.bin \ + -c boot.cat -no-emul-boot -boot-load-size 4 \ + -boot-info-table "$tmp_dir" + isohybrid "$FIMAGE" 2>/dev/null || true + rm -rf "$tmp_dir" +} + +rm -f "$FIMAGE" + +case "$diskfmt" in + bzdisk) genbzdisk;; + fdimage144) genfdimage144;; + fdimage288) genfdimage288;; + hdimage) genhdimage;; + isoimage) geniso;; + *) die "Unknown image format: $diskfmt";; +esac diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S new file mode 100644 index 000000000..f912d7770 --- /dev/null +++ b/arch/x86/boot/header.S @@ -0,0 +1,660 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * header.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Based on bootsect.S and setup.S + * modified by more people than can be counted + * + * Rewritten as a common file by H. Peter Anvin (Apr 2007) + * + * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment + * addresses must be multiplied by 16 to obtain their respective linear + * addresses. To avoid confusion, linear addresses are written using leading + * hex while segment addresses are written as segment:offset. + * + */ +#include +#include +#include +#include +#include +#include +#include "boot.h" +#include "voffset.h" +#include "zoffset.h" + +BOOTSEG = 0x07C0 /* original address of boot-sector */ +SYSSEG = 0x1000 /* historical load address >> 4 */ + +#ifndef SVGA_MODE +#define SVGA_MODE ASK_VGA +#endif + +#ifndef ROOT_RDONLY +#define ROOT_RDONLY 1 +#endif + + .code16 + .section ".bstext", "ax" + + .global bootsect_start +bootsect_start: +#ifdef CONFIG_EFI_STUB + # "MZ", MS-DOS header + .word MZ_MAGIC +#endif + + # Normalize the start address + ljmp $BOOTSEG, $start2 + +start2: + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + xorw %sp, %sp + sti + cld + + movw $bugger_off_msg, %si + +msg_loop: + lodsb + andb %al, %al + jz bs_die + movb $0xe, %ah + movw $7, %bx + int $0x10 + jmp msg_loop + +bs_die: + # Allow the user to press a key, then reboot + xorw %ax, %ax + int $0x16 + int $0x19 + + # int 0x19 should never return. In case it does anyway, + # invoke the BIOS reset code... + ljmp $0xf000,$0xfff0 + +#ifdef CONFIG_EFI_STUB + .org 0x3c + # + # Offset to the PE header. + # + .long pe_header +#endif /* CONFIG_EFI_STUB */ + + .section ".bsdata", "a" +bugger_off_msg: + .ascii "Use a boot loader.\r\n" + .ascii "\n" + .ascii "Remove disk and press any key to reboot...\r\n" + .byte 0 + +#ifdef CONFIG_EFI_STUB +pe_header: + .long PE_MAGIC + +coff_header: +#ifdef CONFIG_X86_32 + .set image_file_add_flags, IMAGE_FILE_32BIT_MACHINE + .set pe_opt_magic, PE_OPT_MAGIC_PE32 + .word IMAGE_FILE_MACHINE_I386 +#else + .set image_file_add_flags, 0 + .set pe_opt_magic, PE_OPT_MAGIC_PE32PLUS + .word IMAGE_FILE_MACHINE_AMD64 +#endif + .word section_count # nr_sections + .long 0 # TimeDateStamp + .long 0 # PointerToSymbolTable + .long 1 # NumberOfSymbols + .word section_table - optional_header # SizeOfOptionalHeader + .word IMAGE_FILE_EXECUTABLE_IMAGE | \ + image_file_add_flags | \ + IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_LINE_NUMS_STRIPPED # Characteristics + +optional_header: + .word pe_opt_magic + .byte 0x02 # MajorLinkerVersion + .byte 0x14 # MinorLinkerVersion + + # Filled in by build.c + .long 0 # SizeOfCode + + .long 0 # SizeOfInitializedData + .long 0 # SizeOfUninitializedData + + # Filled in by build.c + .long 0x0000 # AddressOfEntryPoint + + .long 0x0200 # BaseOfCode +#ifdef CONFIG_X86_32 + .long 0 # data +#endif + +extra_header_fields: + # PE specification requires ImageBase to be 64k aligned + .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff +#ifdef CONFIG_X86_32 + .long image_base # ImageBase +#else + .quad image_base # ImageBase +#endif + .long 0x20 # SectionAlignment + .long 0x20 # FileAlignment + .word 0 # MajorOperatingSystemVersion + .word 0 # MinorOperatingSystemVersion + .word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion + .word LINUX_EFISTUB_MINOR_VERSION # MinorImageVersion + .word 0 # MajorSubsystemVersion + .word 0 # MinorSubsystemVersion + .long 0 # Win32VersionValue + + # + # The size of the bzImage is written in tools/build.c + # + .long 0 # SizeOfImage + + .long 0x200 # SizeOfHeaders + .long 0 # CheckSum + .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application) +#ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES + .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics +#else + .word 0 # DllCharacteristics +#endif +#ifdef CONFIG_X86_32 + .long 0 # SizeOfStackReserve + .long 0 # SizeOfStackCommit + .long 0 # SizeOfHeapReserve + .long 0 # SizeOfHeapCommit +#else + .quad 0 # SizeOfStackReserve + .quad 0 # SizeOfStackCommit + .quad 0 # SizeOfHeapReserve + .quad 0 # SizeOfHeapCommit +#endif + .long 0 # LoaderFlags + .long (section_table - .) / 8 # NumberOfRvaAndSizes + + .quad 0 # ExportTable + .quad 0 # ImportTable + .quad 0 # ResourceTable + .quad 0 # ExceptionTable + .quad 0 # CertificationTable + .quad 0 # BaseRelocationTable + + # Section table +section_table: + # + # The offset & size fields are filled in by build.c. + # + .ascii ".setup" + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE | \ + IMAGE_SCN_ALIGN_16BYTES # Characteristics + + # + # The EFI application loader requires a relocation section + # because EFI applications must be relocatable. The .reloc + # offset & size fields are filled in by build.c. + # + .ascii ".reloc" + .byte 0 + .byte 0 + .long 0 + .long 0 + .long 0 # SizeOfRawData + .long 0 # PointerToRawData + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_DISCARDABLE | \ + IMAGE_SCN_ALIGN_1BYTES # Characteristics + +#ifdef CONFIG_EFI_MIXED + # + # The offset & size fields are filled in by build.c. + # + .asciz ".compat" + .long 0 + .long 0x0 + .long 0 # Size of initialized data + # on disk + .long 0x0 + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_DISCARDABLE | \ + IMAGE_SCN_ALIGN_1BYTES # Characteristics +#endif + + # + # The offset & size fields are filled in by build.c. + # + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE | \ + IMAGE_SCN_ALIGN_16BYTES # Characteristics + + .set section_count, (. - section_table) / 40 +#endif /* CONFIG_EFI_STUB */ + + # Kernel attributes; used by setup. This is part 1 of the + # header, from the old boot sector. + + .section ".header", "a" + .globl sentinel +sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ + + .globl hdr +hdr: +setup_sects: .byte 0 /* Filled in by build.c */ +root_flags: .word ROOT_RDONLY +syssize: .long 0 /* Filled in by build.c */ +ram_size: .word 0 /* Obsolete */ +vid_mode: .word SVGA_MODE +root_dev: .word 0 /* Filled in by build.c */ +boot_flag: .word 0xAA55 + + # offset 512, entry point + + .globl _start +_start: + # Explicitly enter this as bytes, or the assembler + # tries to generate a 3-byte jump here, which causes + # everything else to push off to the wrong offset. + .byte 0xeb # short (2-byte) jump + .byte start_of_setup-1f +1: + + # Part 2 of the header, from the old setup.S + + .ascii "HdrS" # header signature + .word 0x020f # header version number (>= 0x0105) + # or else old loadlin-1.5 will fail) + .globl realmode_swtch +realmode_swtch: .word 0, 0 # default_switch, SETUPSEG +start_sys_seg: .word SYSSEG # obsolete and meaningless, but just + # in case something decided to "use" it + .word kernel_version-512 # pointing to kernel version string + # above section of header is compatible + # with loadlin-1.5 (header v1.5). Don't + # change it. + +type_of_loader: .byte 0 # 0 means ancient bootloader, newer + # bootloaders know to change this. + # See Documentation/x86/boot.rst for + # assigned ids + +# flags, unused bits must be zero (RFU) bit within loadflags +loadflags: + .byte LOADED_HIGH # The kernel is to be loaded high + +setup_move_size: .word 0x8000 # size to move, when setup is not + # loaded at 0x90000. We will move setup + # to 0x90000 then just before jumping + # into the kernel. However, only the + # loader knows how much data behind + # us also needs to be loaded. + +code32_start: # here loaders can put a different + # start address for 32-bit code. + .long 0x100000 # 0x100000 = default for big kernel + +ramdisk_image: .long 0 # address of loaded ramdisk image + # Here the loader puts the 32-bit + # address where it loaded the image. + # This only will be read by the kernel. + +ramdisk_size: .long 0 # its size in bytes + +bootsect_kludge: + .long 0 # obsolete + +heap_end_ptr: .word _end+STACK_SIZE-512 + # (Header version 0x0201 or later) + # space from here (exclusive) down to + # end of setup code can be used by setup + # for local heap purposes. + +ext_loader_ver: + .byte 0 # Extended boot loader version +ext_loader_type: + .byte 0 # Extended boot loader type + +cmd_line_ptr: .long 0 # (Header version 0x0202 or later) + # If nonzero, a 32-bit pointer + # to the kernel command line. + # The command line should be + # located between the start of + # setup and the end of low + # memory (0xa0000), or it may + # get overwritten before it + # gets read. If this field is + # used, there is no longer + # anything magical about the + # 0x90000 segment; the setup + # can be located anywhere in + # low memory 0x10000 or higher. + +initrd_addr_max: .long 0x7fffffff + # (Header version 0x0203 or later) + # The highest safe address for + # the contents of an initrd + # The current kernel allows up to 4 GB, + # but leave it at 2 GB to avoid + # possible bootloader bugs. + +kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment + #required for protected mode + #kernel +#ifdef CONFIG_RELOCATABLE +relocatable_kernel: .byte 1 +#else +relocatable_kernel: .byte 0 +#endif +min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment + +xloadflags: +#ifdef CONFIG_X86_64 +# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */ +#else +# define XLF0 0 +#endif + +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) + /* kernel/boot_param/ramdisk could be loaded above 4g */ +# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G +#else +# define XLF1 0 +#endif + +#ifdef CONFIG_EFI_STUB +# ifdef CONFIG_EFI_MIXED +# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) +# else +# ifdef CONFIG_X86_64 +# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ +# else +# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */ +# endif +# endif +#else +# define XLF23 0 +#endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE) +# define XLF4 XLF_EFI_KEXEC +#else +# define XLF4 0 +#endif + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_5LEVEL +#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED) +#else +#define XLF56 XLF_5LEVEL +#endif +#else +#define XLF56 0 +#endif + + .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56 + +cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, + #added with boot protocol + #version 2.06 + +hardware_subarch: .long 0 # subarchitecture, added with 2.07 + # default to 0 for normal x86 PC + +hardware_subarch_data: .quad 0 + +payload_offset: .long ZO_input_data +payload_length: .long ZO_z_input_len + +setup_data: .quad 0 # 64-bit physical pointer to + # single linked list of + # struct setup_data + +pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr + +# +# Getting to provably safe in-place decompression is hard. Worst case +# behaviours need to be analyzed. Here let's take the decompression of +# a gzip-compressed kernel as example, to illustrate it: +# +# The file layout of gzip compressed kernel is: +# +# magic[2] +# method[1] +# flags[1] +# timestamp[4] +# extraflags[1] +# os[1] +# compressed data blocks[N] +# crc[4] orig_len[4] +# +# ... resulting in +18 bytes overhead of uncompressed data. +# +# (For more information, please refer to RFC 1951 and RFC 1952.) +# +# Files divided into blocks +# 1 bit (last block flag) +# 2 bits (block type) +# +# 1 block occurs every 32K -1 bytes or when there 50% compression +# has been achieved. The smallest block type encoding is always used. +# +# stored: +# 32 bits length in bytes. +# +# fixed: +# magic fixed tree. +# symbols. +# +# dynamic: +# dynamic tree encoding. +# symbols. +# +# +# The buffer for decompression in place is the length of the uncompressed +# data, plus a small amount extra to keep the algorithm safe. The +# compressed data is placed at the end of the buffer. The output pointer +# is placed at the start of the buffer and the input pointer is placed +# where the compressed data starts. Problems will occur when the output +# pointer overruns the input pointer. +# +# The output pointer can only overrun the input pointer if the input +# pointer is moving faster than the output pointer. A condition only +# triggered by data whose compressed form is larger than the uncompressed +# form. +# +# The worst case at the block level is a growth of the compressed data +# of 5 bytes per 32767 bytes. +# +# The worst case internal to a compressed block is very hard to figure. +# The worst case can at least be bounded by having one bit that represents +# 32764 bytes and then all of the rest of the bytes representing the very +# very last byte. +# +# All of which is enough to compute an amount of extra data that is required +# to be safe. To avoid problems at the block level allocating 5 extra bytes +# per 32767 bytes of data is sufficient. To avoid problems internal to a +# block adding an extra 32767 bytes (the worst case uncompressed block size) +# is sufficient, to ensure that in the worst case the decompressed data for +# block will stop the byte before the compressed data for a block begins. +# To avoid problems with the compressed data's meta information an extra 18 +# bytes are needed. Leading to the formula: +# +# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 +# +# Adding 8 bytes per 32K is a bit excessive but much easier to calculate. +# Adding 32768 instead of 32767 just makes for round numbers. +# +# Above analysis is for decompressing gzip compressed kernel only. Up to +# now 6 different decompressor are supported all together. And among them +# xz stores data in chunks and has maximum chunk of 64K. Hence safety +# margin should be updated to cover all decompressors so that we don't +# need to deal with each of them separately. Please check +# the description in lib/decompressor_xxx.c for specific information. +# +# extra_bytes = (uncompressed_size >> 12) + 65536 + 128 +# +# LZ4 is even worse: data that cannot be further compressed grows by 0.4%, +# or one byte per 256 bytes. OTOH, we can safely get rid of the +128 as +# the size-dependent part now grows so fast. +# +# extra_bytes = (uncompressed_size >> 8) + 65536 +# +# ZSTD compressed data grows by at most 3 bytes per 128K, and only has a 22 +# byte fixed overhead but has a maximum block size of 128K, so it needs a +# larger margin. +# +# extra_bytes = (uncompressed_size >> 8) + 131072 + +#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 131072) +#if ZO_z_output_len > ZO_z_input_len +# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ + ZO_z_input_len) +#else +# define ZO_z_extract_offset ZO_z_extra_bytes +#endif + +/* + * The extract_offset has to be bigger than ZO head section. Otherwise when + * the head code is running to move ZO to the end of the buffer, it will + * overwrite the head code itself. + */ +#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset +# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095) +#else +# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095) +#endif + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset) + +#define VO_INIT_SIZE (VO__end - VO__text) +#if ZO_INIT_SIZE > VO_INIT_SIZE +# define INIT_SIZE ZO_INIT_SIZE +#else +# define INIT_SIZE VO_INIT_SIZE +#endif + +init_size: .long INIT_SIZE # kernel initialization size +handover_offset: .long 0 # Filled in by build.c +kernel_info_offset: .long 0 # Filled in by build.c + +# End of setup header ##################################################### + + .section ".entrytext", "ax" +start_of_setup: +# Force %es = %ds + movw %ds, %ax + movw %ax, %es + cld + +# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds, +# which happened to work by accident for the old code. Recalculate the stack +# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the +# stack behind its own code, so we can't blindly put it directly past the heap. + + movw %ss, %dx + cmpw %ax, %dx # %ds == %ss? + movw %sp, %dx + je 2f # -> assume %sp is reasonably set + + # Invalid %ss, make up a new stack + movw $_end, %dx + testb $CAN_USE_HEAP, loadflags + jz 1f + movw heap_end_ptr, %dx +1: addw $STACK_SIZE, %dx + jnc 2f + xorw %dx, %dx # Prevent wraparound + +2: # Now %dx should point to the end of our stack space + andw $~3, %dx # dword align (might as well...) + jnz 3f + movw $0xfffc, %dx # Make sure we're not zero +3: movw %ax, %ss + movzwl %dx, %esp # Clear upper half of %esp + sti # Now we should have a working stack + +# We will have entered with %cs = %ds+0x20, normalize %cs so +# it is on par with the other segments. + pushw %ds + pushw $6f + lretw +6: + +# Check signature at end of setup + cmpl $0x5a5aaa55, setup_sig + jne setup_bad + +# Zero the bss + movw $__bss_start, %di + movw $_end+3, %cx + xorl %eax, %eax + subw %di, %cx + shrw $2, %cx + rep; stosl + +# Jump to C code (should not return) + calll main + +# Setup corrupt somehow... +setup_bad: + movl $setup_corrupt, %eax + calll puts + # Fall through... + + .globl die + .type die, @function +die: + hlt + jmp die + + .size die, .-die + + .section ".initdata", "a" +setup_corrupt: + .byte 7 + .string "No setup signature found...\n" diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh new file mode 100755 index 000000000..0849f4b42 --- /dev/null +++ b/arch/x86/boot/install.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for i386 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +if [ -x /sbin/lilo ]; then + /sbin/lilo +elif [ -x /etc/lilo/install ]; then + /etc/lilo/install +else + sync + echo "Cannot find LILO." +fi diff --git a/arch/x86/boot/io.h b/arch/x86/boot/io.h new file mode 100644 index 000000000..110880907 --- /dev/null +++ b/arch/x86/boot/io.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_IO_H +#define BOOT_IO_H + +#include + +#undef inb +#undef inw +#undef inl +#undef outb +#undef outw +#undef outl + +struct port_io_ops { + u8 (*f_inb)(u16 port); + void (*f_outb)(u8 v, u16 port); + void (*f_outw)(u16 v, u16 port); +}; + +extern struct port_io_ops pio_ops; + +/* + * Use the normal I/O instructions by default. + * TDX guests override these to use hypercalls. + */ +static inline void init_default_io_ops(void) +{ + pio_ops.f_inb = __inb; + pio_ops.f_outb = __outb; + pio_ops.f_outw = __outw; +} + +/* + * Redirect port I/O operations via pio_ops callbacks. + * TDX guests override these callbacks with TDX-specific helpers. + */ +#define inb pio_ops.f_inb +#define outb pio_ops.f_outb +#define outw pio_ops.f_outw + +#endif diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c new file mode 100644 index 000000000..c4ea5258a --- /dev/null +++ b/arch/x86/boot/main.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Main module for the real-mode kernel code + */ +#include + +#include "boot.h" +#include "string.h" + +struct boot_params boot_params __attribute__((aligned(16))); + +struct port_io_ops pio_ops; + +char *HEAP = _end; +char *heap_end = _end; /* Default end of heap = no heap */ + +/* + * Copy the header into the boot parameter block. Since this + * screws up the old-style command line protocol, adjust by + * filling in the new-style command line pointer instead. + */ + +static void copy_boot_params(void) +{ + struct old_cmdline { + u16 cl_magic; + u16 cl_offset; + }; + const struct old_cmdline * const oldcmd = + absolute_pointer(OLD_CL_ADDRESS); + + BUILD_BUG_ON(sizeof(boot_params) != 4096); + memcpy(&boot_params.hdr, &hdr, sizeof(hdr)); + + if (!boot_params.hdr.cmd_line_ptr && + oldcmd->cl_magic == OLD_CL_MAGIC) { + /* Old-style command line protocol. */ + u16 cmdline_seg; + + /* Figure out if the command line falls in the region + of memory that an old kernel would have copied up + to 0x90000... */ + if (oldcmd->cl_offset < boot_params.hdr.setup_move_size) + cmdline_seg = ds(); + else + cmdline_seg = 0x9000; + + boot_params.hdr.cmd_line_ptr = + (cmdline_seg << 4) + oldcmd->cl_offset; + } +} + +/* + * Query the keyboard lock status as given by the BIOS, and + * set the keyboard repeat rate to maximum. Unclear why the latter + * is done here; this might be possible to kill off as stale code. + */ +static void keyboard_init(void) +{ + struct biosregs ireg, oreg; + initregs(&ireg); + + ireg.ah = 0x02; /* Get keyboard status */ + intcall(0x16, &ireg, &oreg); + boot_params.kbd_status = oreg.al; + + ireg.ax = 0x0305; /* Set keyboard repeat rate */ + intcall(0x16, &ireg, NULL); +} + +/* + * Get Intel SpeedStep (IST) information. + */ +static void query_ist(void) +{ + struct biosregs ireg, oreg; + + /* Some older BIOSes apparently crash on this call, so filter + it from machines too old to have SpeedStep at all. */ + if (cpu.level < 6) + return; + + initregs(&ireg); + ireg.ax = 0xe980; /* IST Support */ + ireg.edx = 0x47534943; /* Request value */ + intcall(0x15, &ireg, &oreg); + + boot_params.ist_info.signature = oreg.eax; + boot_params.ist_info.command = oreg.ebx; + boot_params.ist_info.event = oreg.ecx; + boot_params.ist_info.perf_level = oreg.edx; +} + +/* + * Tell the BIOS what CPU mode we intend to run in. + */ +static void set_bios_mode(void) +{ +#ifdef CONFIG_X86_64 + struct biosregs ireg; + + initregs(&ireg); + ireg.ax = 0xec00; + ireg.bx = 2; + intcall(0x15, &ireg, NULL); +#endif +} + +static void init_heap(void) +{ + char *stack_end; + + if (boot_params.hdr.loadflags & CAN_USE_HEAP) { + asm("leal %P1(%%esp),%0" + : "=r" (stack_end) : "i" (-STACK_SIZE)); + + heap_end = (char *) + ((size_t)boot_params.hdr.heap_end_ptr + 0x200); + if (heap_end > stack_end) + heap_end = stack_end; + } else { + /* Boot protocol 2.00 only, no heap available */ + puts("WARNING: Ancient bootloader, some functionality " + "may be limited!\n"); + } +} + +void main(void) +{ + init_default_io_ops(); + + /* First, copy the boot header into the "zeropage" */ + copy_boot_params(); + + /* Initialize the early-boot console */ + console_init(); + if (cmdline_find_option_bool("debug")) + puts("early console in setup code\n"); + + /* End of heap check */ + init_heap(); + + /* Make sure we have all the proper CPU support */ + if (validate_cpu()) { + puts("Unable to boot - please use a kernel appropriate " + "for your CPU.\n"); + die(); + } + + /* Tell the BIOS what CPU mode we intend to run in. */ + set_bios_mode(); + + /* Detect memory layout */ + detect_memory(); + + /* Set keyboard repeat rate (why?) and query the lock flags */ + keyboard_init(); + + /* Query Intel SpeedStep (IST) information */ + query_ist(); + + /* Query APM information */ +#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) + query_apm_bios(); +#endif + + /* Query EDD information */ +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + query_edd(); +#endif + + /* Set the video mode */ + set_video(); + + /* Do the last things and invoke protected mode */ + go_to_protected_mode(); +} diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c new file mode 100644 index 000000000..b0422b79d --- /dev/null +++ b/arch/x86/boot/memory.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Memory detection code + */ + +#include "boot.h" + +#define SMAP 0x534d4150 /* ASCII "SMAP" */ + +static void detect_memory_e820(void) +{ + int count = 0; + struct biosregs ireg, oreg; + struct boot_e820_entry *desc = boot_params.e820_table; + static struct boot_e820_entry buf; /* static so it is zeroed */ + + initregs(&ireg); + ireg.ax = 0xe820; + ireg.cx = sizeof(buf); + ireg.edx = SMAP; + ireg.di = (size_t)&buf; + + /* + * Note: at least one BIOS is known which assumes that the + * buffer pointed to by one e820 call is the same one as + * the previous call, and only changes modified fields. Therefore, + * we use a temporary buffer and copy the results entry by entry. + * + * This routine deliberately does not try to account for + * ACPI 3+ extended attributes. This is because there are + * BIOSes in the field which report zero for the valid bit for + * all ranges, and we don't currently make any use of the + * other attribute bits. Revisit this if we see the extended + * attribute bits deployed in a meaningful way in the future. + */ + + do { + intcall(0x15, &ireg, &oreg); + ireg.ebx = oreg.ebx; /* for next iteration... */ + + /* BIOSes which terminate the chain with CF = 1 as opposed + to %ebx = 0 don't always report the SMAP signature on + the final, failing, probe. */ + if (oreg.eflags & X86_EFLAGS_CF) + break; + + /* Some BIOSes stop returning SMAP in the middle of + the search loop. We don't know exactly how the BIOS + screwed up the map at that point, we might have a + partial map, the full map, or complete garbage, so + just return failure. */ + if (oreg.eax != SMAP) { + count = 0; + break; + } + + *desc++ = buf; + count++; + } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_table)); + + boot_params.e820_entries = count; +} + +static void detect_memory_e801(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ax = 0xe801; + intcall(0x15, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) + return; + + /* Do we really need to do this? */ + if (oreg.cx || oreg.dx) { + oreg.ax = oreg.cx; + oreg.bx = oreg.dx; + } + + if (oreg.ax > 15*1024) { + return; /* Bogus! */ + } else if (oreg.ax == 15*1024) { + boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax; + } else { + /* + * This ignores memory above 16MB if we have a memory + * hole there. If someone actually finds a machine + * with a memory hole at 16MB and no support for + * 0E820h they should probably generate a fake e820 + * map. + */ + boot_params.alt_mem_k = oreg.ax; + } +} + +static void detect_memory_88(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x88; + intcall(0x15, &ireg, &oreg); + + boot_params.screen_info.ext_mem_k = oreg.ax; +} + +void detect_memory(void) +{ + detect_memory_e820(); + + detect_memory_e801(); + + detect_memory_88(); +} diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c new file mode 100644 index 000000000..da0ccc5de --- /dev/null +++ b/arch/x86/boot/mkcpustr.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* ----------------------------------------------------------------------- * + * + * Copyright 2008 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * This is a host program to preprocess the CPU strings into a + * compact format suitable for the setup code. + */ + +#include + +#include "../include/asm/required-features.h" +#include "../include/asm/disabled-features.h" +#include "../include/asm/cpufeatures.h" +#include "../include/asm/vmxfeatures.h" +#include "../kernel/cpu/capflags.c" + +int main(void) +{ + int i, j; + const char *str; + + printf("static const char x86_cap_strs[] =\n"); + + for (i = 0; i < NCAPINTS; i++) { + for (j = 0; j < 32; j++) { + str = x86_cap_flags[i*32+j]; + + if (i == NCAPINTS-1 && j == 31) { + /* The last entry must be unconditional; this + also consumes the compiler-added null + character */ + if (!str) + str = ""; + printf("\t\"\\x%02x\\x%02x\"\"%s\"\n", + i, j, str); + } else if (str) { + printf("#if REQUIRED_MASK%d & (1 << %d)\n" + "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n" + "#endif\n", + i, j, i, j, str); + } + } + } + printf("\t;\n"); + return 0; +} diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h new file mode 100644 index 000000000..aed66f7ae --- /dev/null +++ b/arch/x86/boot/msr.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Helpers/definitions related to MSR access. + */ + +#ifndef BOOT_MSR_H +#define BOOT_MSR_H + +#include + +/* + * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the + * boot kernel since they rely on tracepoint/exception handling infrastructure + * that's not available here. + */ +static inline void boot_rdmsr(unsigned int reg, struct msr *m) +{ + asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); +} + +static inline void boot_wrmsr(unsigned int reg, const struct msr *m) +{ + asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); +} + +#endif /* BOOT_MSR_H */ diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in new file mode 100644 index 000000000..174c60508 --- /dev/null +++ b/arch/x86/boot/mtools.conf.in @@ -0,0 +1,21 @@ +# +# mtools configuration file for "make (b)zdisk" +# + +# Actual floppy drive +drive a: + file="/dev/fd0" + +# 1.44 MB floppy disk image +drive v: + file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter + +# 2.88 MB floppy disk image (mostly for virtual uses) +drive w: + file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter + +# Hard disk (h: for the filesystem, p: for format - old mtools bug?) +drive h: + file="@OBJ@/hdimage" offset=32768 mformat_only +drive p: + file="@OBJ@/hdimage" partition=1 mformat_only diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c new file mode 100644 index 000000000..40031a614 --- /dev/null +++ b/arch/x86/boot/pm.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Prepare the machine for transition to protected mode. + */ + +#include "boot.h" +#include + +/* + * Invoke the realmode switch hook if present; otherwise + * disable all interrupts. + */ +static void realmode_switch_hook(void) +{ + if (boot_params.hdr.realmode_swtch) { + asm volatile("lcallw *%0" + : : "m" (boot_params.hdr.realmode_swtch) + : "eax", "ebx", "ecx", "edx"); + } else { + asm volatile("cli"); + outb(0x80, 0x70); /* Disable NMI */ + io_delay(); + } +} + +/* + * Disable all interrupts at the legacy PIC. + */ +static void mask_all_interrupts(void) +{ + outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */ + io_delay(); + outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */ + io_delay(); +} + +/* + * Reset IGNNE# if asserted in the FPU. + */ +static void reset_coprocessor(void) +{ + outb(0, 0xf0); + io_delay(); + outb(0, 0xf1); + io_delay(); +} + +/* + * Set up the GDT + */ + +struct gdt_ptr { + u16 len; + u32 ptr; +} __attribute__((packed)); + +static void setup_gdt(void) +{ + /* There are machines which are known to not boot with the GDT + being 8-byte unaligned. Intel recommends 16 byte alignment. */ + static const u64 boot_gdt[] __attribute__((aligned(16))) = { + /* CS: code, read/execute, 4 GB, base 0 */ + [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff), + /* DS: data, read/write, 4 GB, base 0 */ + [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff), + /* TSS: 32-bit tss, 104 bytes, base 4096 */ + /* We only have a TSS here to keep Intel VT happy; + we don't actually use it for anything. */ + [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103), + }; + /* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead + of the gdt_ptr contents. Thus, make it static so it will + stay in memory, at least long enough that we switch to the + proper kernel GDT. */ + static struct gdt_ptr gdt; + + gdt.len = sizeof(boot_gdt)-1; + gdt.ptr = (u32)&boot_gdt + (ds() << 4); + + asm volatile("lgdtl %0" : : "m" (gdt)); +} + +/* + * Set up the IDT + */ +static void setup_idt(void) +{ + static const struct gdt_ptr null_idt = {0, 0}; + asm volatile("lidtl %0" : : "m" (null_idt)); +} + +/* + * Actual invocation sequence + */ +void go_to_protected_mode(void) +{ + /* Hook before leaving real mode, also disables interrupts */ + realmode_switch_hook(); + + /* Enable the A20 gate */ + if (enable_a20()) { + puts("A20 gate not responding, unable to boot...\n"); + die(); + } + + /* Reset coprocessor (IGNNE#) */ + reset_coprocessor(); + + /* Mask all interrupts in the PIC */ + mask_all_interrupts(); + + /* Actual transition to protected mode... */ + setup_idt(); + setup_gdt(); + protected_mode_jump(boot_params.hdr.code32_start, + (u32)&boot_params + (ds() << 4)); +} diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S new file mode 100644 index 000000000..cbec8bd08 --- /dev/null +++ b/arch/x86/boot/pmjump.S @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * The actual transition into protected mode + */ + +#include +#include +#include +#include + + .text + .code16 + +/* + * void protected_mode_jump(u32 entrypoint, u32 bootparams); + */ +SYM_FUNC_START_NOALIGN(protected_mode_jump) + movl %edx, %esi # Pointer to boot_params table + + xorl %ebx, %ebx + movw %cs, %bx + shll $4, %ebx + addl %ebx, 2f + jmp 1f # Short jump to serialize on 386/486 +1: + + movw $__BOOT_DS, %cx + movw $__BOOT_TSS, %di + + movl %cr0, %edx + orb $X86_CR0_PE, %dl # Protected mode + movl %edx, %cr0 + + # Transition to 32-bit mode + .byte 0x66, 0xea # ljmpl opcode +2: .long .Lin_pm32 # offset + .word __BOOT_CS # segment +SYM_FUNC_END(protected_mode_jump) + + .code32 + .section ".text32","ax" +SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32) + # Set up data segments for flat 32-bit mode + movl %ecx, %ds + movl %ecx, %es + movl %ecx, %fs + movl %ecx, %gs + movl %ecx, %ss + # The 32-bit code sets up its own stack, but this way we do have + # a valid stack if some debugging hack wants to use it. + addl %ebx, %esp + + # Set up TR to make Intel VT happy + ltr %di + + # Clear registers to allow for future extensions to the + # 32-bit boot protocol + xorl %ecx, %ecx + xorl %edx, %edx + xorl %ebx, %ebx + xorl %ebp, %ebp + xorl %edi, %edi + + # Set up LDTR to make Intel VT happy + lldt %cx + + jmpl *%eax # Jump to the 32-bit entrypoint +SYM_FUNC_END(.Lin_pm32) diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c new file mode 100644 index 000000000..1237beeb9 --- /dev/null +++ b/arch/x86/boot/printf.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Oh, it's a waste of space, but oh-so-yummy for debugging. This + * version of printf() does not include 64-bit support. "Live with + * it." + * + */ + +#include "boot.h" + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SMALL 32 /* Must be 32 == 0x20 */ +#define SPECIAL 64 /* 0x */ + +#define __do_div(n, base) ({ \ +int __res; \ +__res = ((unsigned long) n) % (unsigned) base; \ +n = ((unsigned long) n) / (unsigned) base; \ +__res; }) + +static char *number(char *str, long num, int base, int size, int precision, + int type) +{ + /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ + static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ + + char tmp[66]; + char c, sign, locase; + int i; + + /* locase = 0 or 0x20. ORing digits or letters with 'locase' + * produces same digits or (maybe lowercased) letters */ + locase = (type & SMALL); + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 16) + return NULL; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++] = '0'; + else + while (num != 0) + tmp[i++] = (digits[__do_div(num, base)] | locase); + if (i > precision) + precision = i; + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + while (size-- > 0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base == 8) + *str++ = '0'; + else if (base == 16) { + *str++ = '0'; + *str++ = ('X' | locase); + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long num; + int i, base; + char *str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + for (str = buf; *fmt; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char)va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2 * sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'x': + flags |= SMALL; + case 'X': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, unsigned long); + else if (qualifier == 'h') { + num = (unsigned short)va_arg(args, int); + if (flags & SIGN) + num = (short)num; + } else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, unsigned int); + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str - buf; +} + +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsprintf(buf, fmt, args); + va_end(args); + return i; +} + +int printf(const char *fmt, ...) +{ + char printf_buf[1024]; + va_list args; + int printed; + + va_start(args, fmt); + printed = vsprintf(printf_buf, fmt, args); + va_end(args); + + puts(printf_buf); + + return printed; +} diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c new file mode 100644 index 000000000..55de6b309 --- /dev/null +++ b/arch/x86/boot/regs.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Simple helper function for initializing a register set. + * + * Note that this sets EFLAGS_CF in the input register set; this + * makes it easier to catch functions which do nothing but don't + * explicitly set CF. + */ + +#include "boot.h" +#include "string.h" + +void initregs(struct biosregs *reg) +{ + memset(reg, 0, sizeof(*reg)); + reg->eflags |= X86_EFLAGS_CF; + reg->ds = ds(); + reg->es = ds(); + reg->fs = fs(); + reg->gs = gs(); +} diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld new file mode 100644 index 000000000..49546c247 --- /dev/null +++ b/arch/x86/boot/setup.ld @@ -0,0 +1,66 @@ +/* + * setup.ld + * + * Linker script for the i386 setup code + */ +OUTPUT_FORMAT("elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + . = 0; + .bstext : { *(.bstext) } + .bsdata : { *(.bsdata) } + + . = 495; + .header : { *(.header) } + .entrytext : { *(.entrytext) } + .inittext : { *(.inittext) } + .initdata : { *(.initdata) } + __end_init = .; + + .text : { *(.text .text.*) } + .text32 : { *(.text32) } + + . = ALIGN(16); + .rodata : { *(.rodata*) } + + .videocards : { + video_cards = .; + *(.videocards) + video_cards_end = .; + } + + . = ALIGN(16); + .data : { *(.data*) } + + .signature : { + setup_sig = .; + LONG(0x5a5aaa55) + } + + + . = ALIGN(16); + .bss : + { + __bss_start = .; + *(.bss) + __bss_end = .; + } + . = ALIGN(16); + _end = .; + + /DISCARD/ : { + *(.note*) + } + + /* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ + . = ASSERT(_end <= 0x8000, "Setup too big!"); + . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); + /* Necessary for the very-old-loader check to work... */ + . = ASSERT(__end_init <= 5*512, "init sections too big!"); + +} diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c new file mode 100644 index 000000000..8a3fff912 --- /dev/null +++ b/arch/x86/boot/string.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Very basic string functions + */ + +#include +#include +#include +#include +#include +#include "ctype.h" +#include "string.h" + +#define KSTRTOX_OVERFLOW (1U << 31) + +/* + * Undef these macros so that the functions that we provide + * here will have the correct names regardless of how string.h + * may have chosen to #define them. + */ +#undef memcpy +#undef memset +#undef memcmp + +int memcmp(const void *s1, const void *s2, size_t len) +{ + bool diff; + asm("repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +/* + * Clang may lower `memcmp == 0` to `bcmp == 0`. + */ +int bcmp(const void *s1, const void *s2, size_t len) +{ + return memcmp(s1, s2, len); +} + +int strcmp(const char *str1, const char *str2) +{ + const unsigned char *s1 = (const unsigned char *)str1; + const unsigned char *s2 = (const unsigned char *)str2; + int delta = 0; + + while (*s1 || *s2) { + delta = *s1 - *s2; + if (delta) + return delta; + s1++; + s2++; + } + return 0; +} + +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +size_t strnlen(const char *s, size_t maxlen) +{ + const char *es = s; + while (*es && maxlen) { + es++; + maxlen--; + } + + return (es - s); +} + +unsigned int atou(const char *s) +{ + unsigned int i = 0; + while (isdigit(*s)) + i = i * 10 + (*s++ - '0'); + return i; +} + +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static unsigned int simple_guess_base(const char *cp) +{ + if (cp[0] == '0') { + if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) + return 16; + else + return 8; + } else { + return 10; + } +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) +{ + unsigned long long result = 0; + + if (!base) + base = simple_guess_base(cp); + + if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') + cp += 2; + + while (isxdigit(*cp)) { + unsigned int value; + + value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; + if (value >= base) + break; + result = result * base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + + return result; +} + +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} + +/** + * strlen - Find the length of a string + * @s: The string to be sized + */ +size_t strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +/** + * strchr - Find the first occurrence of the character c in the string s. + * @s: the string to be searched + * @c: the character to search for + */ +char *strchr(const char *s, int c) +{ + while (*s != (char)c) + if (*s++ == '\0') + return NULL; + return (char *)s; +} + +static inline u64 __div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + union { + u64 v64; + u32 v32[2]; + } d = { dividend }; + u32 upper; + + upper = d.v32[1]; + d.v32[1] = 0; + if (upper >= divisor) { + d.v32[1] = upper / divisor; + upper %= divisor; + } + asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) : + "rm" (divisor), "0" (d.v32[0]), "1" (upper)); + return d.v64; +} + +static inline u64 __div_u64(u64 dividend, u32 divisor) +{ + u32 remainder; + + return __div_u64_rem(dividend, divisor, &remainder); +} + +static inline char _tolower(const char c) +{ + return c | 0x20; +} + +static const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) +{ + if (*base == 0) { + if (s[0] == '0') { + if (_tolower(s[1]) == 'x' && isxdigit(s[2])) + *base = 16; + else + *base = 8; + } else + *base = 10; + } + if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x') + s += 2; + return s; +} + +/* + * Convert non-negative integer string representation in explicitly given radix + * to an integer. + * Return number of characters consumed maybe or-ed with overflow bit. + * If overflow occurs, result integer (incorrect) is still returned. + * + * Don't you dare use this function. + */ +static unsigned int _parse_integer(const char *s, + unsigned int base, + unsigned long long *p) +{ + unsigned long long res; + unsigned int rv; + + res = 0; + rv = 0; + while (1) { + unsigned int c = *s; + unsigned int lc = c | 0x20; /* don't tolower() this line */ + unsigned int val; + + if ('0' <= c && c <= '9') + val = c - '0'; + else if ('a' <= lc && lc <= 'f') + val = lc - 'a' + 10; + else + break; + + if (val >= base) + break; + /* + * Check for overflow only if we are within range of + * it in the max base we support (16) + */ + if (unlikely(res & (~0ull << 60))) { + if (res > __div_u64(ULLONG_MAX - val, base)) + rv |= KSTRTOX_OVERFLOW; + } + res = res * base + val; + rv++; + s++; + } + *p = res; + return rv; +} + +static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) +{ + unsigned long long _res; + unsigned int rv; + + s = _parse_integer_fixup_radix(s, &base); + rv = _parse_integer(s, base, &_res); + if (rv & KSTRTOX_OVERFLOW) + return -ERANGE; + if (rv == 0) + return -EINVAL; + s += rv; + if (*s == '\n') + s++; + if (*s) + return -EINVAL; + *res = _res; + return 0; +} + +/** + * kstrtoull - convert a string to an unsigned long long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ +int kstrtoull(const char *s, unsigned int base, unsigned long long *res) +{ + if (s[0] == '+') + s++; + return _kstrtoull(s, base, res); +} + +static int _kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long)tmp) + return -ERANGE; + *res = tmp; + return 0; +} + +/** + * kstrtoul - convert a string to an unsigned long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the simple_strtoull. + */ +int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. + */ + if (sizeof(unsigned long) == sizeof(unsigned long long) && + __alignof__(unsigned long) == __alignof__(unsigned long long)) + return kstrtoull(s, base, (unsigned long long *)res); + else + return _kstrtoul(s, base, res); +} diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h new file mode 100644 index 000000000..e5d2c6b8c --- /dev/null +++ b/arch/x86/boot/string.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_STRING_H +#define BOOT_STRING_H + +/* Undef any of these macros coming from string_32.h. */ +#undef memcpy +#undef memset +#undef memcmp + +void *memcpy(void *dst, const void *src, size_t len); +void *memmove(void *dst, const void *src, size_t len); +void *memset(void *dst, int c, size_t len); +int memcmp(const void *s1, const void *s2, size_t len); +int bcmp(const void *s1, const void *s2, size_t len); + +/* Access builtin version by default. */ +#define memcpy(d,s,l) __builtin_memcpy(d,s,l) +#define memset(d,c,l) __builtin_memset(d,c,l) +#define memcmp __builtin_memcmp + +extern int strcmp(const char *str1, const char *str2); +extern int strncmp(const char *cs, const char *ct, size_t count); +extern size_t strlen(const char *s); +extern char *strstr(const char *s1, const char *s2); +extern char *strchr(const char *s, int c); +extern size_t strnlen(const char *s, size_t maxlen); +extern unsigned int atou(const char *s); +extern unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base); +long simple_strtol(const char *cp, char **endp, unsigned int base); + +int kstrtoull(const char *s, unsigned int base, unsigned long long *res); +int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res); +#endif /* BOOT_STRING_H */ diff --git a/arch/x86/boot/tools/.gitignore b/arch/x86/boot/tools/.gitignore new file mode 100644 index 000000000..ae91f4d0d --- /dev/null +++ b/arch/x86/boot/tools/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +build diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c new file mode 100644 index 000000000..a3725ad46 --- /dev/null +++ b/arch/x86/boot/tools/build.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * Copyright (C) 2007 H. Peter Anvin + */ + +/* + * This file builds a disk-image from three different files: + * + * - setup: 8086 machine code, sets up system parm + * - system: 80386 code for actual system + * - zoffset.h: header with ZO_* defines + * + * It does some checking that all files are of the correct type, and writes + * the result to the specified destination, removing headers and padding to + * the right amount. It also writes some system data to stdout. + */ + +/* + * Changes by tytso to allow root device specification + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + * Cross compiling fixes by Gertjan van Wingerde, July 1996 + * Rewritten by Martin Mares, April 1997 + * Substantially overhauled by H. Peter Anvin, April 2007 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; + +#define DEFAULT_MAJOR_ROOT 0 +#define DEFAULT_MINOR_ROOT 0 +#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT) + +/* Minimal number of setup sectors */ +#define SETUP_SECT_MIN 5 +#define SETUP_SECT_MAX 64 + +/* This must be large enough to hold the entire setup */ +u8 buf[SETUP_SECT_MAX*512]; + +#define PECOFF_RELOC_RESERVE 0x20 + +#ifdef CONFIG_EFI_MIXED +#define PECOFF_COMPAT_RESERVE 0x20 +#else +#define PECOFF_COMPAT_RESERVE 0x0 +#endif + +static unsigned long efi32_stub_entry; +static unsigned long efi64_stub_entry; +static unsigned long efi_pe_entry; +static unsigned long efi32_pe_entry; +static unsigned long kernel_info; +static unsigned long startup_64; +static unsigned long _ehead; +static unsigned long _end; + +/*----------------------------------------------------------------------*/ + +static const u32 crctab32[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d +}; + +static u32 partial_crc32_one(u8 c, u32 crc) +{ + return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); +} + +static u32 partial_crc32(const u8 *s, int len, u32 crc) +{ + while (len--) + crc = partial_crc32_one(*s++, crc); + return crc; +} + +static void die(const char * str, ...) +{ + va_list args; + va_start(args, str); + vfprintf(stderr, str, args); + va_end(args); + fputc('\n', stderr); + exit(1); +} + +static void usage(void) +{ + die("Usage: build setup system zoffset.h image"); +} + +#ifdef CONFIG_EFI_STUB + +static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) +{ + unsigned int pe_header; + unsigned short num_sections; + u8 *section; + + pe_header = get_unaligned_le32(&buf[0x3c]); + num_sections = get_unaligned_le16(&buf[pe_header + 6]); + +#ifdef CONFIG_X86_32 + section = &buf[pe_header + 0xa8]; +#else + section = &buf[pe_header + 0xb8]; +#endif + + while (num_sections > 0) { + if (strncmp((char*)section, section_name, 8) == 0) { + /* section header size field */ + put_unaligned_le32(size, section + 0x8); + + /* section header vma field */ + put_unaligned_le32(vma, section + 0xc); + + /* section header 'size of initialised data' field */ + put_unaligned_le32(datasz, section + 0x10); + + /* section header 'file offset' field */ + put_unaligned_le32(offset, section + 0x14); + + break; + } + section += 0x28; + num_sections--; + } +} + +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + update_pecoff_section_header_fields(section_name, offset, size, size, offset); +} + +static void update_pecoff_setup_and_reloc(unsigned int size) +{ + u32 setup_offset = 0x200; + u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE; +#ifdef CONFIG_EFI_MIXED + u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE; +#endif + u32 setup_size = reloc_offset - setup_offset; + + update_pecoff_section_header(".setup", setup_offset, setup_size); + update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); + + /* + * Modify .reloc section contents with a single entry. The + * relocation is applied to offset 10 of the relocation section. + */ + put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); + put_unaligned_le32(10, &buf[reloc_offset + 4]); + +#ifdef CONFIG_EFI_MIXED + update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE); + + /* + * Put the IA-32 machine type (0x14c) and the associated entry point + * address in the .compat section, so loaders can figure out which other + * execution modes this image supports. + */ + buf[compat_offset] = 0x1; + buf[compat_offset + 1] = 0x8; + put_unaligned_le16(0x14c, &buf[compat_offset + 2]); + put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]); +#endif +} + +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz, + unsigned int init_sz) +{ + unsigned int pe_header; + unsigned int text_sz = file_sz - text_start; + unsigned int bss_sz = init_sz - file_sz; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* + * The PE/COFF loader may load the image at an address which is + * misaligned with respect to the kernel_alignment field in the setup + * header. + * + * In order to avoid relocating the kernel to correct the misalignment, + * add slack to allow the buffer to be aligned within the declared size + * of the image. + */ + bss_sz += CONFIG_PHYSICAL_ALIGN; + init_sz += CONFIG_PHYSICAL_ALIGN; + + /* + * Size of code: Subtract the size of the first sector (512 bytes) + * which includes the header. + */ + put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]); + + /* Size of image */ + put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + + /* + * Address of entry point for PE/COFF executable + */ + put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]); + + update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz, + text_sz, text_start); +} + +static int reserve_pecoff_reloc_section(int c) +{ + /* Reserve 0x20 bytes for .reloc section */ + memset(buf+c, 0, PECOFF_RELOC_RESERVE); + return PECOFF_RELOC_RESERVE; +} + +static void efi_stub_defaults(void) +{ + /* Defaults for old kernel */ +#ifdef CONFIG_X86_32 + efi_pe_entry = 0x10; +#else + efi_pe_entry = 0x210; + startup_64 = 0x200; +#endif +} + +static void efi_stub_entry_update(void) +{ + unsigned long addr = efi32_stub_entry; + +#ifdef CONFIG_X86_64 + /* Yes, this is really how we defined it :( */ + addr = efi64_stub_entry - 0x200; +#endif + +#ifdef CONFIG_EFI_MIXED + if (efi32_stub_entry != addr) + die("32-bit and 64-bit EFI entry points do not match\n"); +#endif + put_unaligned_le32(addr, &buf[0x264]); +} + +#else + +static inline void update_pecoff_setup_and_reloc(unsigned int size) {} +static inline void update_pecoff_text(unsigned int text_start, + unsigned int file_sz, + unsigned int init_sz) {} +static inline void efi_stub_defaults(void) {} +static inline void efi_stub_entry_update(void) {} + +static inline int reserve_pecoff_reloc_section(int c) +{ + return 0; +} +#endif /* CONFIG_EFI_STUB */ + +static int reserve_pecoff_compat_section(int c) +{ + /* Reserve 0x20 bytes for .compat section */ + memset(buf+c, 0, PECOFF_COMPAT_RESERVE); + return PECOFF_COMPAT_RESERVE; +} + +/* + * Parse zoffset.h and find the entry points. We could just #include zoffset.h + * but that would mean tools/build would have to be rebuilt every time. It's + * not as if parsing it is hard... + */ +#define PARSE_ZOFS(p, sym) do { \ + if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \ + sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \ +} while (0) + +static void parse_zoffset(char *fname) +{ + FILE *file; + char *p; + int c; + + file = fopen(fname, "r"); + if (!file) + die("Unable to open `%s': %m", fname); + c = fread(buf, 1, sizeof(buf) - 1, file); + if (ferror(file)) + die("read-error on `zoffset.h'"); + fclose(file); + buf[c] = 0; + + p = (char *)buf; + + while (p && *p) { + PARSE_ZOFS(p, efi32_stub_entry); + PARSE_ZOFS(p, efi64_stub_entry); + PARSE_ZOFS(p, efi_pe_entry); + PARSE_ZOFS(p, efi32_pe_entry); + PARSE_ZOFS(p, kernel_info); + PARSE_ZOFS(p, startup_64); + PARSE_ZOFS(p, _ehead); + PARSE_ZOFS(p, _end); + + p = strchr(p, '\n'); + while (p && (*p == '\r' || *p == '\n')) + p++; + } +} + +int main(int argc, char ** argv) +{ + unsigned int i, sz, setup_sectors, init_sz; + int c; + u32 sys_size; + struct stat sb; + FILE *file, *dest; + int fd; + void *kernel; + u32 crc = 0xffffffffUL; + + efi_stub_defaults(); + + if (argc != 5) + usage(); + parse_zoffset(argv[3]); + + dest = fopen(argv[4], "w"); + if (!dest) + die("Unable to write `%s': %m", argv[4]); + + /* Copy the setup code */ + file = fopen(argv[1], "r"); + if (!file) + die("Unable to open `%s': %m", argv[1]); + c = fread(buf, 1, sizeof(buf), file); + if (ferror(file)) + die("read-error on `setup'"); + if (c < 1024) + die("The setup must be at least 1024 bytes"); + if (get_unaligned_le16(&buf[510]) != 0xAA55) + die("Boot block hasn't got boot flag (0xAA55)"); + fclose(file); + + c += reserve_pecoff_compat_section(c); + c += reserve_pecoff_reloc_section(c); + + /* Pad unused space with zeros */ + setup_sectors = (c + 511) / 512; + if (setup_sectors < SETUP_SECT_MIN) + setup_sectors = SETUP_SECT_MIN; + i = setup_sectors*512; + memset(buf+c, 0, i-c); + + update_pecoff_setup_and_reloc(i); + + /* Set the default root device */ + put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); + + /* Open and stat the kernel file */ + fd = open(argv[2], O_RDONLY); + if (fd < 0) + die("Unable to open `%s': %m", argv[2]); + if (fstat(fd, &sb)) + die("Unable to stat `%s': %m", argv[2]); + sz = sb.st_size; + kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); + if (kernel == MAP_FAILED) + die("Unable to mmap '%s': %m", argv[2]); + /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ + sys_size = (sz + 15 + 4) / 16; +#ifdef CONFIG_EFI_STUB + /* + * COFF requires minimum 32-byte alignment of sections, and + * adding a signature is problematic without that alignment. + */ + sys_size = (sys_size + 1) & ~1; +#endif + + /* Patch the setup code with the appropriate size parameters */ + buf[0x1f1] = setup_sectors-1; + put_unaligned_le32(sys_size, &buf[0x1f4]); + + init_sz = get_unaligned_le32(&buf[0x260]); +#ifdef CONFIG_EFI_STUB + /* + * The decompression buffer will start at ImageBase. When relocating + * the compressed kernel to its end, we must ensure that the head + * section does not get overwritten. The head section occupies + * [i, i + _ehead), and the destination is [init_sz - _end, init_sz). + * + * At present these should never overlap, because 'i' is at most 32k + * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the + * calculation of INIT_SIZE in boot/header.S ensures that + * 'init_sz - _end' is at least 64k. + * + * For future-proofing, increase init_sz if necessary. + */ + + if (init_sz - _end < i + _ehead) { + init_sz = (i + _ehead + _end + 4095) & ~4095; + put_unaligned_le32(init_sz, &buf[0x260]); + } +#endif + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz); + + efi_stub_entry_update(); + + /* Update kernel_info offset. */ + put_unaligned_le32(kernel_info, &buf[0x268]); + + crc = partial_crc32(buf, i, crc); + if (fwrite(buf, 1, i, dest) != i) + die("Writing setup failed"); + + /* Copy the kernel code */ + crc = partial_crc32(kernel, sz, crc); + if (fwrite(kernel, 1, sz, dest) != sz) + die("Writing kernel failed"); + + /* Add padding leaving 4 bytes for the checksum */ + while (sz++ < (sys_size*16) - 4) { + crc = partial_crc32_one('\0', crc); + if (fwrite("\0", 1, 1, dest) != 1) + die("Writing padding failed"); + } + + /* Write the CRC */ + put_unaligned_le32(crc, buf); + if (fwrite(buf, 1, 4, dest) != 4) + die("Writing CRC failed"); + + /* Catch any delayed write failures */ + if (fclose(dest)) + die("Writing image failed"); + + close(fd); + + /* Everything is OK */ + return 0; +} diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c new file mode 100644 index 000000000..f7eb976b0 --- /dev/null +++ b/arch/x86/boot/tty.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Very simple screen and serial I/O + */ + +#include "boot.h" + +int early_serial_base; + +#define XMTRDY 0x20 + +#define TXR 0 /* Transmit register (WRITE) */ +#define LSR 5 /* Line Status */ + +/* + * These functions are in .inittext so they can be used to signal + * error during initialization. + */ + +static void __section(".inittext") serial_putchar(int ch) +{ + unsigned timeout = 0xffff; + + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + cpu_relax(); + + outb(ch, early_serial_base + TXR); +} + +static void __section(".inittext") bios_putchar(int ch) +{ + struct biosregs ireg; + + initregs(&ireg); + ireg.bx = 0x0007; + ireg.cx = 0x0001; + ireg.ah = 0x0e; + ireg.al = ch; + intcall(0x10, &ireg, NULL); +} + +void __section(".inittext") putchar(int ch) +{ + if (ch == '\n') + putchar('\r'); /* \n -> \r\n */ + + bios_putchar(ch); + + if (early_serial_base != 0) + serial_putchar(ch); +} + +void __section(".inittext") puts(const char *str) +{ + while (*str) + putchar(*str++); +} + +/* + * Read the CMOS clock through the BIOS, and return the + * seconds in BCD. + */ + +static u8 gettime(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x02; + intcall(0x1a, &ireg, &oreg); + + return oreg.dh; +} + +/* + * Read from the keyboard + */ +int getchar(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + /* ireg.ah = 0x00; */ + intcall(0x16, &ireg, &oreg); + + return oreg.al; +} + +static int kbd_pending(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x01; + intcall(0x16, &ireg, &oreg); + + return !(oreg.eflags & X86_EFLAGS_ZF); +} + +void kbd_flush(void) +{ + for (;;) { + if (!kbd_pending()) + break; + getchar(); + } +} + +int getchar_timeout(void) +{ + int cnt = 30; + int t0, t1; + + t0 = gettime(); + + while (cnt) { + if (kbd_pending()) + return getchar(); + + t1 = gettime(); + if (t0 != t1) { + cnt--; + t0 = t1; + } + } + + return 0; /* Timeout! */ +} + diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c new file mode 100644 index 000000000..945383f0f --- /dev/null +++ b/arch/x86/boot/version.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Kernel version string + */ + +#include "boot.h" +#include +#include +#include + +const char kernel_version[] = + UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " + UTS_VERSION; diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h new file mode 100644 index 000000000..9e23fdffb --- /dev/null +++ b/arch/x86/boot/vesa.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* ----------------------------------------------------------------------- * + * + * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +#ifndef BOOT_VESA_H +#define BOOT_VESA_H + +typedef struct { + u16 off, seg; +} far_ptr; + +/* VESA General Information table */ +struct vesa_general_info { + u32 signature; /* 0 Magic number = "VESA" */ + u16 version; /* 4 */ + far_ptr vendor_string; /* 6 */ + u32 capabilities; /* 10 */ + far_ptr video_mode_ptr; /* 14 */ + u16 total_memory; /* 18 */ + + u8 reserved[236]; /* 20 */ +} __attribute__ ((packed)); + +#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) + +struct vesa_mode_info { + u16 mode_attr; /* 0 */ + u8 win_attr[2]; /* 2 */ + u16 win_grain; /* 4 */ + u16 win_size; /* 6 */ + u16 win_seg[2]; /* 8 */ + far_ptr win_scheme; /* 12 */ + u16 logical_scan; /* 16 */ + + u16 h_res; /* 18 */ + u16 v_res; /* 20 */ + u8 char_width; /* 22 */ + u8 char_height; /* 23 */ + u8 memory_planes; /* 24 */ + u8 bpp; /* 25 */ + u8 banks; /* 26 */ + u8 memory_layout; /* 27 */ + u8 bank_size; /* 28 */ + u8 image_planes; /* 29 */ + u8 page_function; /* 30 */ + + u8 rmask; /* 31 */ + u8 rpos; /* 32 */ + u8 gmask; /* 33 */ + u8 gpos; /* 34 */ + u8 bmask; /* 35 */ + u8 bpos; /* 36 */ + u8 resv_mask; /* 37 */ + u8 resv_pos; /* 38 */ + u8 dcm_info; /* 39 */ + + u32 lfb_ptr; /* 40 Linear frame buffer address */ + u32 offscreen_ptr; /* 44 Offscreen memory address */ + u16 offscreen_size; /* 48 */ + + u8 reserved[206]; /* 50 */ +} __attribute__ ((packed)); + +#endif /* LIB_SYS_VESA_H */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c new file mode 100644 index 000000000..6eb8c06bc --- /dev/null +++ b/arch/x86/boot/video-bios.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Standard video BIOS modes + * + * We have two options for this; silent and scanned. + */ + +#include "boot.h" +#include "video.h" + +static __videocard video_bios; + +/* Set a conventional BIOS mode */ +static int set_bios_mode(u8 mode); + +static int bios_set_mode(struct mode_info *mi) +{ + return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS); +} + +static int set_bios_mode(u8 mode) +{ + struct biosregs ireg, oreg; + u8 new_mode; + + initregs(&ireg); + ireg.al = mode; /* AH=0x00 Set Video Mode */ + intcall(0x10, &ireg, NULL); + + ireg.ah = 0x0f; /* Get Current Video Mode */ + intcall(0x10, &ireg, &oreg); + + do_restore = 1; /* Assume video contents were lost */ + + /* Not all BIOSes are clean with the top bit */ + new_mode = oreg.al & 0x7f; + + if (new_mode == mode) + return 0; /* Mode change OK */ + +#ifndef _WAKEUP + if (new_mode != boot_params.screen_info.orig_video_mode) { + /* Mode setting failed, but we didn't end up where we + started. That's bad. Try to revert to the original + video mode. */ + ireg.ax = boot_params.screen_info.orig_video_mode; + intcall(0x10, &ireg, NULL); + } +#endif + return -1; +} + +static int bios_probe(void) +{ + u8 mode; +#ifdef _WAKEUP + u8 saved_mode = 0x03; +#else + u8 saved_mode = boot_params.screen_info.orig_video_mode; +#endif + u16 crtc; + struct mode_info *mi; + int nmodes = 0; + + if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA) + return 0; + + set_fs(0); + crtc = vga_crtc(); + + video_bios.modes = GET_HEAP(struct mode_info, 0); + + for (mode = 0x14; mode <= 0x7f; mode++) { + if (!heap_free(sizeof(struct mode_info))) + break; + + if (mode_defined(VIDEO_FIRST_BIOS+mode)) + continue; + + if (set_bios_mode(mode)) + continue; + + /* Try to verify that it's a text mode. */ + + /* Attribute Controller: make graphics controller disabled */ + if (in_idx(0x3c0, 0x10) & 0x01) + continue; + + /* Graphics Controller: verify Alpha addressing enabled */ + if (in_idx(0x3ce, 0x06) & 0x01) + continue; + + /* CRTC cursor location low should be zero(?) */ + if (in_idx(crtc, 0x0f)) + continue; + + mi = GET_HEAP(struct mode_info, 1); + mi->mode = VIDEO_FIRST_BIOS+mode; + mi->depth = 0; /* text */ + mi->x = rdfs16(0x44a); + mi->y = rdfs8(0x484)+1; + nmodes++; + } + + set_bios_mode(saved_mode); + + return nmodes; +} + +static __videocard video_bios = +{ + .card_name = "BIOS", + .probe = bios_probe, + .set_mode = bios_set_mode, + .unsafe = 1, + .xmode_first = VIDEO_FIRST_BIOS, + .xmode_n = 0x80, +}; diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c new file mode 100644 index 000000000..9ada55dc1 --- /dev/null +++ b/arch/x86/boot/video-mode.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video-mode.c + * + * Set the video mode. This is separated out into a different + * file in order to be shared with the ACPI wakeup code. + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +#include + +/* + * Common variables + */ +int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ +int force_x, force_y; /* Don't query the BIOS for cols/rows */ +int do_restore; /* Screen contents changed during mode flip */ +int graphic_mode; /* Graphic mode with linear frame buffer */ + +/* Probe the video drivers and have them generate their mode lists. */ +void probe_cards(int unsafe) +{ + struct card_info *card; + static u8 probed[2]; + + if (probed[unsafe]) + return; + + probed[unsafe] = 1; + + for (card = video_cards; card < video_cards_end; card++) { + if (card->unsafe == unsafe) { + if (card->probe) + card->nmodes = card->probe(); + else + card->nmodes = 0; + } + } +} + +/* Test if a mode is defined */ +int mode_defined(u16 mode) +{ + struct card_info *card; + struct mode_info *mi; + int i; + + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + if (mi->mode == mode) + return 1; + } + } + + return 0; +} + +/* Set mode (without recalc) */ +static int raw_set_mode(u16 mode, u16 *real_mode) +{ + int nmode, i; + struct card_info *card; + struct mode_info *mi; + + /* Drop the recalc bit if set */ + mode &= ~VIDEO_RECALC; + + /* Scan for mode based on fixed ID, position, or resolution */ + nmode = 0; + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + int visible = mi->x || mi->y; + + if ((mode == nmode && visible) || + mode == mi->mode || + mode == (mi->y << 8)+mi->x) { + *real_mode = mi->mode; + return card->set_mode(mi); + } + + if (visible) + nmode++; + } + } + + /* Nothing found? Is it an "exceptional" (unprobed) mode? */ + for (card = video_cards; card < video_cards_end; card++) { + if (mode >= card->xmode_first && + mode < card->xmode_first+card->xmode_n) { + struct mode_info mix; + *real_mode = mix.mode = mode; + mix.x = mix.y = 0; + return card->set_mode(&mix); + } + } + + /* Otherwise, failure... */ + return -1; +} + +/* + * Recalculate the vertical video cutoff (hack!) + */ +static void vga_recalc_vertical(void) +{ + unsigned int font_size, rows; + u16 crtc; + u8 pt, ov; + + set_fs(0); + font_size = rdfs8(0x485); /* BIOS: font size (pixels) */ + rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */ + + rows *= font_size; /* Visible scan lines */ + rows--; /* ... minus one */ + + crtc = vga_crtc(); + + pt = in_idx(crtc, 0x11); + pt &= ~0x80; /* Unlock CR0-7 */ + out_idx(pt, crtc, 0x11); + + out_idx((u8)rows, crtc, 0x12); /* Lower height register */ + + ov = in_idx(crtc, 0x07); /* Overflow register */ + ov &= 0xbd; + ov |= (rows >> (8-1)) & 0x02; + ov |= (rows >> (9-6)) & 0x40; + out_idx(ov, crtc, 0x07); +} + +/* Set mode (with recalc if specified) */ +int set_mode(u16 mode) +{ + int rv; + u16 real_mode; + + /* Very special mode numbers... */ + if (mode == VIDEO_CURRENT_MODE) + return 0; /* Nothing to do... */ + else if (mode == NORMAL_VGA) + mode = VIDEO_80x25; + else if (mode == EXTENDED_VGA) + mode = VIDEO_8POINT; + + rv = raw_set_mode(mode, &real_mode); + if (rv) + return rv; + + if (mode & VIDEO_RECALC) + vga_recalc_vertical(); + + /* Save the canonical mode number for the kernel, not + an alias, size specification or menu position */ +#ifndef _WAKEUP + boot_params.hdr.vid_mode = real_mode; +#endif + return 0; +} diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c new file mode 100644 index 000000000..c2c6d35e3 --- /dev/null +++ b/arch/x86/boot/video-vesa.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * VESA text modes + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" +#include "string.h" + +/* VESA information */ +static struct vesa_general_info vginfo; +static struct vesa_mode_info vminfo; + +static __videocard video_vesa; + +#ifndef _WAKEUP +static void vesa_store_mode_params_graphics(void); +#else /* _WAKEUP */ +static inline void vesa_store_mode_params_graphics(void) {} +#endif /* _WAKEUP */ + +static int vesa_probe(void) +{ + struct biosregs ireg, oreg; + u16 mode; + addr_t mode_ptr; + struct mode_info *mi; + int nmodes = 0; + + video_vesa.modes = GET_HEAP(struct mode_info, 0); + + initregs(&ireg); + ireg.ax = 0x4f00; + ireg.di = (size_t)&vginfo; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f || + vginfo.signature != VESA_MAGIC || + vginfo.version < 0x0102) + return 0; /* Not present */ + + set_fs(vginfo.video_mode_ptr.seg); + mode_ptr = vginfo.video_mode_ptr.off; + + while ((mode = rdfs16(mode_ptr)) != 0xffff) { + mode_ptr += 2; + + if (!heap_free(sizeof(struct mode_info))) + break; /* Heap full, can't save mode info */ + + if (mode & ~0x1ff) + continue; + + memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */ + + ireg.ax = 0x4f01; + ireg.cx = mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + continue; + + if ((vminfo.mode_attr & 0x15) == 0x05) { + /* Text Mode, TTY BIOS supported, + supported by hardware */ + mi = GET_HEAP(struct mode_info, 1); + mi->mode = mode + VIDEO_FIRST_VESA; + mi->depth = 0; /* text */ + mi->x = vminfo.h_res; + mi->y = vminfo.v_res; + nmodes++; + } else if ((vminfo.mode_attr & 0x99) == 0x99 && + (vminfo.memory_layout == 4 || + vminfo.memory_layout == 6) && + vminfo.memory_planes == 1) { +#ifdef CONFIG_BOOT_VESA_SUPPORT + /* Graphics mode, color, linear frame buffer + supported. Only register the mode if + if framebuffer is configured, however, + otherwise the user will be left without a screen. */ + mi = GET_HEAP(struct mode_info, 1); + mi->mode = mode + VIDEO_FIRST_VESA; + mi->depth = vminfo.bpp; + mi->x = vminfo.h_res; + mi->y = vminfo.v_res; + nmodes++; +#endif + } + } + + return nmodes; +} + +static int vesa_set_mode(struct mode_info *mode) +{ + struct biosregs ireg, oreg; + int is_graphic; + u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; + + memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */ + + initregs(&ireg); + ireg.ax = 0x4f01; + ireg.cx = vesa_mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return -1; + + if ((vminfo.mode_attr & 0x15) == 0x05) { + /* It's a supported text mode */ + is_graphic = 0; +#ifdef CONFIG_BOOT_VESA_SUPPORT + } else if ((vminfo.mode_attr & 0x99) == 0x99) { + /* It's a graphics mode with linear frame buffer */ + is_graphic = 1; + vesa_mode |= 0x4000; /* Request linear frame buffer */ +#endif + } else { + return -1; /* Invalid mode */ + } + + + initregs(&ireg); + ireg.ax = 0x4f02; + ireg.bx = vesa_mode; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return -1; + + graphic_mode = is_graphic; + if (!is_graphic) { + /* Text mode */ + force_x = mode->x; + force_y = mode->y; + do_restore = 1; + } else { + /* Graphics mode */ + vesa_store_mode_params_graphics(); + } + + return 0; +} + + +#ifndef _WAKEUP + +/* Switch DAC to 8-bit mode */ +static void vesa_dac_set_8bits(void) +{ + struct biosregs ireg, oreg; + u8 dac_size = 6; + + /* If possible, switch the DAC to 8-bit mode */ + if (vginfo.capabilities & 1) { + initregs(&ireg); + ireg.ax = 0x4f08; + ireg.bh = 0x08; + intcall(0x10, &ireg, &oreg); + if (oreg.ax == 0x004f) + dac_size = oreg.bh; + } + + /* Set the color sizes to the DAC size, and offsets to 0 */ + boot_params.screen_info.red_size = dac_size; + boot_params.screen_info.green_size = dac_size; + boot_params.screen_info.blue_size = dac_size; + boot_params.screen_info.rsvd_size = dac_size; + + boot_params.screen_info.red_pos = 0; + boot_params.screen_info.green_pos = 0; + boot_params.screen_info.blue_pos = 0; + boot_params.screen_info.rsvd_pos = 0; +} + +/* Save the VESA protected mode info */ +static void vesa_store_pm_info(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ax = 0x4f0a; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return; + + boot_params.screen_info.vesapm_seg = oreg.es; + boot_params.screen_info.vesapm_off = oreg.di; +} + +/* + * Save video mode parameters for graphics mode + */ +static void vesa_store_mode_params_graphics(void) +{ + /* Tell the kernel we're in VESA graphics mode */ + boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB; + + /* Mode parameters */ + boot_params.screen_info.vesa_attributes = vminfo.mode_attr; + boot_params.screen_info.lfb_linelength = vminfo.logical_scan; + boot_params.screen_info.lfb_width = vminfo.h_res; + boot_params.screen_info.lfb_height = vminfo.v_res; + boot_params.screen_info.lfb_depth = vminfo.bpp; + boot_params.screen_info.pages = vminfo.image_planes; + boot_params.screen_info.lfb_base = vminfo.lfb_ptr; + memcpy(&boot_params.screen_info.red_size, + &vminfo.rmask, 8); + + /* General parameters */ + boot_params.screen_info.lfb_size = vginfo.total_memory; + + if (vminfo.bpp <= 8) + vesa_dac_set_8bits(); + + vesa_store_pm_info(); +} + +/* + * Save EDID information for the kernel; this is invoked, separately, + * after mode-setting. + */ +void vesa_store_edid(void) +{ +#ifdef CONFIG_FIRMWARE_EDID + struct biosregs ireg, oreg; + + /* Apparently used as a nonsense token... */ + memset(&boot_params.edid_info, 0x13, sizeof(boot_params.edid_info)); + + if (vginfo.version < 0x0200) + return; /* EDID requires VBE 2.0+ */ + + initregs(&ireg); + ireg.ax = 0x4f15; /* VBE DDC */ + /* ireg.bx = 0x0000; */ /* Report DDC capabilities */ + /* ireg.cx = 0; */ /* Controller 0 */ + ireg.es = 0; /* ES:DI must be 0 by spec */ + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return; /* No EDID */ + + /* BH = time in seconds to transfer EDD information */ + /* BL = DDC level supported */ + + ireg.ax = 0x4f15; /* VBE DDC */ + ireg.bx = 0x0001; /* Read EDID */ + /* ireg.cx = 0; */ /* Controller 0 */ + /* ireg.dx = 0; */ /* EDID block number */ + ireg.es = ds(); + ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */ + intcall(0x10, &ireg, &oreg); +#endif /* CONFIG_FIRMWARE_EDID */ +} + +#endif /* not _WAKEUP */ + +static __videocard video_vesa = +{ + .card_name = "VESA", + .probe = vesa_probe, + .set_mode = vesa_set_mode, + .xmode_first = VIDEO_FIRST_VESA, + .xmode_n = 0x200, +}; diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c new file mode 100644 index 000000000..4816cb9cf --- /dev/null +++ b/arch/x86/boot/video-vga.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Common all-VGA modes + */ + +#include "boot.h" +#include "video.h" + +static struct mode_info vga_modes[] = { + { VIDEO_80x25, 80, 25, 0 }, + { VIDEO_8POINT, 80, 50, 0 }, + { VIDEO_80x43, 80, 43, 0 }, + { VIDEO_80x28, 80, 28, 0 }, + { VIDEO_80x30, 80, 30, 0 }, + { VIDEO_80x34, 80, 34, 0 }, + { VIDEO_80x60, 80, 60, 0 }, +}; + +static struct mode_info ega_modes[] = { + { VIDEO_80x25, 80, 25, 0 }, + { VIDEO_8POINT, 80, 43, 0 }, +}; + +static struct mode_info cga_modes[] = { + { VIDEO_80x25, 80, 25, 0 }, +}; + +static __videocard video_vga; + +/* Set basic 80x25 mode */ +static u8 vga_set_basic_mode(void) +{ + struct biosregs ireg, oreg; + u8 mode; + + initregs(&ireg); + + /* Query current mode */ + ireg.ax = 0x0f00; + intcall(0x10, &ireg, &oreg); + mode = oreg.al; + + if (mode != 3 && mode != 7) + mode = 3; + + /* Set the mode */ + ireg.ax = mode; /* AH=0: set mode */ + intcall(0x10, &ireg, NULL); + do_restore = 1; + return mode; +} + +static void vga_set_8font(void) +{ + /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ + struct biosregs ireg; + + initregs(&ireg); + + /* Set 8x8 font */ + ireg.ax = 0x1112; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); + + /* Use alternate print screen */ + ireg.ax = 0x1200; + ireg.bl = 0x20; + intcall(0x10, &ireg, NULL); + + /* Turn off cursor emulation */ + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); + + /* Cursor is scan lines 6-7 */ + ireg.ax = 0x0100; + ireg.cx = 0x0607; + intcall(0x10, &ireg, NULL); +} + +static void vga_set_14font(void) +{ + /* Set 9x14 font - 80x28 on VGA */ + struct biosregs ireg; + + initregs(&ireg); + + /* Set 9x14 font */ + ireg.ax = 0x1111; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); + + /* Turn off cursor emulation */ + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); + + /* Cursor is scan lines 11-12 */ + ireg.ax = 0x0100; + ireg.cx = 0x0b0c; + intcall(0x10, &ireg, NULL); +} + +static void vga_set_80x43(void) +{ + /* Set 80x43 mode on VGA (not EGA) */ + struct biosregs ireg; + + initregs(&ireg); + + /* Set 350 scans */ + ireg.ax = 0x1201; + ireg.bl = 0x30; + intcall(0x10, &ireg, NULL); + + /* Reset video mode */ + ireg.ax = 0x0003; + intcall(0x10, &ireg, NULL); + + vga_set_8font(); +} + +/* I/O address of the VGA CRTC */ +u16 vga_crtc(void) +{ + return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; +} + +static void vga_set_480_scanlines(void) +{ + u16 crtc; /* CRTC base address */ + u8 csel; /* CRTC miscellaneous output register */ + + crtc = vga_crtc(); + + out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ + out_idx(0x0b, crtc, 0x06); /* Vertical total */ + out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ + out_idx(0xea, crtc, 0x10); /* Vertical sync start */ + out_idx(0xdf, crtc, 0x12); /* Vertical display end */ + out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ + out_idx(0x04, crtc, 0x16); /* Vertical blank end */ + csel = inb(0x3cc); + csel &= 0x0d; + csel |= 0xe2; + outb(csel, 0x3c2); +} + +static void vga_set_vertical_end(int lines) +{ + u16 crtc; /* CRTC base address */ + u8 ovfw; /* CRTC overflow register */ + int end = lines-1; + + crtc = vga_crtc(); + + ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40); + + out_idx(ovfw, crtc, 0x07); /* Vertical overflow */ + out_idx(end, crtc, 0x12); /* Vertical display end */ +} + +static void vga_set_80x30(void) +{ + vga_set_480_scanlines(); + vga_set_vertical_end(30*16); +} + +static void vga_set_80x34(void) +{ + vga_set_480_scanlines(); + vga_set_14font(); + vga_set_vertical_end(34*14); +} + +static void vga_set_80x60(void) +{ + vga_set_480_scanlines(); + vga_set_8font(); + vga_set_vertical_end(60*8); +} + +static int vga_set_mode(struct mode_info *mode) +{ + /* Set the basic mode */ + vga_set_basic_mode(); + + /* Override a possibly broken BIOS */ + force_x = mode->x; + force_y = mode->y; + + switch (mode->mode) { + case VIDEO_80x25: + break; + case VIDEO_8POINT: + vga_set_8font(); + break; + case VIDEO_80x43: + vga_set_80x43(); + break; + case VIDEO_80x28: + vga_set_14font(); + break; + case VIDEO_80x30: + vga_set_80x30(); + break; + case VIDEO_80x34: + vga_set_80x34(); + break; + case VIDEO_80x60: + vga_set_80x60(); + break; + } + + return 0; +} + +/* + * Note: this probe includes basic information required by all + * systems. It should be executed first, by making sure + * video-vga.c is listed first in the Makefile. + */ +static int vga_probe(void) +{ + static const char *card_name[] = { + "CGA/MDA/HGC", "EGA", "VGA" + }; + static struct mode_info *mode_lists[] = { + cga_modes, + ega_modes, + vga_modes, + }; + static int mode_count[] = { + ARRAY_SIZE(cga_modes), + ARRAY_SIZE(ega_modes), + ARRAY_SIZE(vga_modes), + }; + + struct biosregs ireg, oreg; + + initregs(&ireg); + + ireg.ax = 0x1200; + ireg.bl = 0x10; /* Check EGA/VGA */ + intcall(0x10, &ireg, &oreg); + +#ifndef _WAKEUP + boot_params.screen_info.orig_video_ega_bx = oreg.bx; +#endif + + /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ + if (oreg.bl != 0x10) { + /* EGA/VGA */ + ireg.ax = 0x1a00; + intcall(0x10, &ireg, &oreg); + + if (oreg.al == 0x1a) { + adapter = ADAPTER_VGA; +#ifndef _WAKEUP + boot_params.screen_info.orig_video_isVGA = 1; +#endif + } else { + adapter = ADAPTER_EGA; + } + } else { + adapter = ADAPTER_CGA; + } + + video_vga.modes = mode_lists[adapter]; + video_vga.card_name = card_name[adapter]; + return mode_count[adapter]; +} + +static __videocard video_vga = { + .card_name = "VGA", + .probe = vga_probe, + .set_mode = vga_set_mode, +}; diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c new file mode 100644 index 000000000..f2e96905b --- /dev/null +++ b/arch/x86/boot/video.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Select video mode + */ + +#include + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +static u16 video_segment; + +static void store_cursor_position(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x03; + intcall(0x10, &ireg, &oreg); + + boot_params.screen_info.orig_x = oreg.dl; + boot_params.screen_info.orig_y = oreg.dh; + + if (oreg.ch & 0x20) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; + + if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f)) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; +} + +static void store_video_mode(void) +{ + struct biosregs ireg, oreg; + + /* N.B.: the saving of the video page here is a bit silly, + since we pretty much assume page 0 everywhere. */ + initregs(&ireg); + ireg.ah = 0x0f; + intcall(0x10, &ireg, &oreg); + + /* Not all BIOSes are clean with respect to the top bit */ + boot_params.screen_info.orig_video_mode = oreg.al & 0x7f; + boot_params.screen_info.orig_video_page = oreg.bh; +} + +/* + * Store the video mode parameters for later usage by the kernel. + * This is done by asking the BIOS except for the rows/columns + * parameters in the default 80x25 mode -- these are set directly, + * because some very obscure BIOSes supply insane values. + */ +static void store_mode_params(void) +{ + u16 font_size; + int x, y; + + /* For graphics mode, it is up to the mode-setting driver + (currently only video-vesa.c) to store the parameters */ + if (graphic_mode) + return; + + store_cursor_position(); + store_video_mode(); + + if (boot_params.screen_info.orig_video_mode == 0x07) { + /* MDA, HGC, or VGA in monochrome mode */ + video_segment = 0xb000; + } else { + /* CGA, EGA, VGA and so forth */ + video_segment = 0xb800; + } + + set_fs(0); + font_size = rdfs16(0x485); /* Font size, BIOS area */ + boot_params.screen_info.orig_video_points = font_size; + + x = rdfs16(0x44a); + y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1; + + if (force_x) + x = force_x; + if (force_y) + y = force_y; + + boot_params.screen_info.orig_video_cols = x; + boot_params.screen_info.orig_video_lines = y; +} + +static unsigned int get_entry(void) +{ + char entry_buf[4]; + int i, len = 0; + int key; + unsigned int v; + + do { + key = getchar(); + + if (key == '\b') { + if (len > 0) { + puts("\b \b"); + len--; + } + } else if ((key >= '0' && key <= '9') || + (key >= 'A' && key <= 'Z') || + (key >= 'a' && key <= 'z')) { + if (len < sizeof(entry_buf)) { + entry_buf[len++] = key; + putchar(key); + } + } + } while (key != '\r'); + putchar('\n'); + + if (len == 0) + return VIDEO_CURRENT_MODE; /* Default */ + + v = 0; + for (i = 0; i < len; i++) { + v <<= 4; + key = entry_buf[i] | 0x20; + v += (key > '9') ? key-'a'+10 : key-'0'; + } + + return v; +} + +static void display_menu(void) +{ + struct card_info *card; + struct mode_info *mi; + char ch; + int i; + int nmodes; + int modes_per_line; + int col; + + nmodes = 0; + for (card = video_cards; card < video_cards_end; card++) + nmodes += card->nmodes; + + modes_per_line = 1; + if (nmodes >= 20) + modes_per_line = 3; + + for (col = 0; col < modes_per_line; col++) + puts("Mode: Resolution: Type: "); + putchar('\n'); + + col = 0; + ch = '0'; + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + char resbuf[32]; + int visible = mi->x && mi->y; + u16 mode_id = mi->mode ? mi->mode : + (mi->y << 8)+mi->x; + + if (!visible) + continue; /* Hidden mode */ + + if (mi->depth) + sprintf(resbuf, "%dx%d", mi->y, mi->depth); + else + sprintf(resbuf, "%d", mi->y); + + printf("%c %03X %4dx%-7s %-6s", + ch, mode_id, mi->x, resbuf, card->card_name); + col++; + if (col >= modes_per_line) { + putchar('\n'); + col = 0; + } + + if (ch == '9') + ch = 'a'; + else if (ch == 'z' || ch == ' ') + ch = ' '; /* Out of keys... */ + else + ch++; + } + } + if (col) + putchar('\n'); +} + +#define H(x) ((x)-'a'+10) +#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n')) + +static unsigned int mode_menu(void) +{ + int key; + unsigned int sel; + + puts("Press to see video modes available, " + " to continue, or wait 30 sec\n"); + + kbd_flush(); + while (1) { + key = getchar_timeout(); + if (key == ' ' || key == 0) + return VIDEO_CURRENT_MODE; /* Default */ + if (key == '\r') + break; + putchar('\a'); /* Beep! */ + } + + + for (;;) { + display_menu(); + + puts("Enter a video mode or \"scan\" to scan for " + "additional modes: "); + sel = get_entry(); + if (sel != SCAN) + return sel; + + probe_cards(1); + } +} + +/* Save screen content to the heap */ +static struct saved_screen { + int x, y; + int curx, cury; + u16 *data; +} saved; + +static void save_screen(void) +{ + /* Should be called after store_mode_params() */ + saved.x = boot_params.screen_info.orig_video_cols; + saved.y = boot_params.screen_info.orig_video_lines; + saved.curx = boot_params.screen_info.orig_x; + saved.cury = boot_params.screen_info.orig_y; + + if (!heap_free(saved.x*saved.y*sizeof(u16)+512)) + return; /* Not enough heap to save the screen */ + + saved.data = GET_HEAP(u16, saved.x*saved.y); + + set_fs(video_segment); + copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16)); +} + +static void restore_screen(void) +{ + /* Should be called after store_mode_params() */ + int xs = boot_params.screen_info.orig_video_cols; + int ys = boot_params.screen_info.orig_video_lines; + int y; + addr_t dst = 0; + u16 *src = saved.data; + struct biosregs ireg; + + if (graphic_mode) + return; /* Can't restore onto a graphic mode */ + + if (!src) + return; /* No saved screen contents */ + + /* Restore screen contents */ + + set_fs(video_segment); + for (y = 0; y < ys; y++) { + int npad; + + if (y < saved.y) { + int copy = (xs < saved.x) ? xs : saved.x; + copy_to_fs(dst, src, copy*sizeof(u16)); + dst += copy*sizeof(u16); + src += saved.x; + npad = (xs < saved.x) ? 0 : xs-saved.x; + } else { + npad = xs; + } + + /* Writes "npad" blank characters to + video_segment:dst and advances dst */ + asm volatile("pushw %%es ; " + "movw %2,%%es ; " + "shrw %%cx ; " + "jnc 1f ; " + "stosw \n\t" + "1: rep;stosl ; " + "popw %%es" + : "+D" (dst), "+c" (npad) + : "bdS" (video_segment), + "a" (0x07200720)); + } + + /* Restore cursor position */ + if (saved.curx >= xs) + saved.curx = xs-1; + if (saved.cury >= ys) + saved.cury = ys-1; + + initregs(&ireg); + ireg.ah = 0x02; /* Set cursor position */ + ireg.dh = saved.cury; + ireg.dl = saved.curx; + intcall(0x10, &ireg, NULL); + + store_cursor_position(); +} + +void set_video(void) +{ + u16 mode = boot_params.hdr.vid_mode; + + RESET_HEAP(); + + store_mode_params(); + save_screen(); + probe_cards(0); + + for (;;) { + if (mode == ASK_VGA) + mode = mode_menu(); + + if (!set_mode(mode)) + break; + + printf("Undefined video mode number: %x\n", mode); + mode = ASK_VGA; + } + boot_params.hdr.vid_mode = mode; + vesa_store_edid(); + store_mode_params(); + + if (do_restore) + restore_screen(); +} diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h new file mode 100644 index 000000000..04bde0bb2 --- /dev/null +++ b/arch/x86/boot/video.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * ----------------------------------------------------------------------- */ + +/* + * Header file for the real-mode video probing code + */ + +#ifndef BOOT_VIDEO_H +#define BOOT_VIDEO_H + +#include + +/* + * This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes + * NORMAL_VGA (-1) + * EXTENDED_VGA (-2) + * ASK_VGA (-3) + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack + * of compatibility when extending the table. These are between 0x00 and 0xff. + */ +#define VIDEO_FIRST_MENU 0x0000 + +/* Standard BIOS video modes (BIOS number + 0x0100) */ +#define VIDEO_FIRST_BIOS 0x0100 + +/* VESA BIOS video modes (VESA number + 0x0200) */ +#define VIDEO_FIRST_VESA 0x0200 + +/* Video7 special modes (BIOS number + 0x0900) */ +#define VIDEO_FIRST_V7 0x0900 + +/* Special video modes */ +#define VIDEO_FIRST_SPECIAL 0x0f00 +#define VIDEO_80x25 0x0f00 +#define VIDEO_8POINT 0x0f01 +#define VIDEO_80x43 0x0f02 +#define VIDEO_80x28 0x0f03 +#define VIDEO_CURRENT_MODE 0x0f04 +#define VIDEO_80x30 0x0f05 +#define VIDEO_80x34 0x0f06 +#define VIDEO_80x60 0x0f07 +#define VIDEO_GFX_HACK 0x0f08 +#define VIDEO_LAST_SPECIAL 0x0f09 + +/* Video modes given by resolution */ +#define VIDEO_FIRST_RESOLUTION 0x1000 + +/* The "recalculate timings" flag */ +#define VIDEO_RECALC 0x8000 + +void store_screen(void); +#define DO_STORE() store_screen() + +/* + * Mode table structures + */ + +struct mode_info { + u16 mode; /* Mode number (vga= style) */ + u16 x, y; /* Width, height */ + u16 depth; /* Bits per pixel, 0 for text mode */ +}; + +struct card_info { + const char *card_name; + int (*set_mode)(struct mode_info *mode); + int (*probe)(void); + struct mode_info *modes; + int nmodes; /* Number of probed modes so far */ + int unsafe; /* Probing is unsafe, only do after "scan" */ + u16 xmode_first; /* Unprobed modes to try to call anyway */ + u16 xmode_n; /* Size of unprobed mode range */ +}; + +#define __videocard struct card_info __section(".videocards") __attribute__((used)) +extern struct card_info video_cards[], video_cards_end[]; + +int mode_defined(u16 mode); /* video.c */ + +/* Basic video information */ +#define ADAPTER_CGA 0 /* CGA/MDA/HGC */ +#define ADAPTER_EGA 1 +#define ADAPTER_VGA 2 + +extern int adapter; +extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ +extern int do_restore; /* Restore screen contents */ +extern int graphic_mode; /* Graphics mode with linear frame buffer */ + +/* Accessing VGA indexed registers */ +static inline u8 in_idx(u16 port, u8 index) +{ + outb(index, port); + return inb(port+1); +} + +static inline void out_idx(u8 v, u16 port, u8 index) +{ + outw(index+(v << 8), port); +} + +/* Writes a value to an indexed port and then reads the port again */ +static inline u8 tst_idx(u8 v, u16 port, u8 index) +{ + out_idx(port, index, v); + return in_idx(port, index); +} + +/* Get the I/O port of the VGA CRTC */ +u16 vga_crtc(void); /* video-vga.c */ + +#endif /* BOOT_VIDEO_H */ diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile new file mode 100644 index 000000000..c816acf78 --- /dev/null +++ b/arch/x86/coco/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS_REMOVE_core.o = -pg +KASAN_SANITIZE_core.o := n +CFLAGS_core.o += -fno-stack-protector + +obj-y += core.o + +obj-$(CONFIG_INTEL_TDX_GUEST) += tdx/ diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c new file mode 100644 index 000000000..49b44f881 --- /dev/null +++ b/arch/x86/coco/core.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#include +#include + +#include +#include + +static enum cc_vendor vendor __ro_after_init; +static u64 cc_mask __ro_after_init; + +static bool intel_cc_platform_has(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_GUEST_UNROLL_STRING_IO: + case CC_ATTR_HOTPLUG_DISABLED: + case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_MEM_ENCRYPT: + return true; + default: + return false; + } +} + +/* + * SME and SEV are very similar but they are not the same, so there are + * times that the kernel will need to distinguish between SME and SEV. The + * cc_platform_has() function is used for this. When a distinction isn't + * needed, the CC_ATTR_MEM_ENCRYPT attribute can be used. + * + * The trampoline code is a good example for this requirement. Before + * paging is activated, SME will access all memory as decrypted, but SEV + * will access all memory as encrypted. So, when APs are being brought + * up under SME the trampoline area cannot be encrypted, whereas under SEV + * the trampoline area must be encrypted. + */ +static bool amd_cc_platform_has(enum cc_attr attr) +{ +#ifdef CONFIG_AMD_MEM_ENCRYPT + switch (attr) { + case CC_ATTR_MEM_ENCRYPT: + return sme_me_mask; + + case CC_ATTR_HOST_MEM_ENCRYPT: + return sme_me_mask && !(sev_status & MSR_AMD64_SEV_ENABLED); + + case CC_ATTR_GUEST_MEM_ENCRYPT: + return sev_status & MSR_AMD64_SEV_ENABLED; + + case CC_ATTR_GUEST_STATE_ENCRYPT: + return sev_status & MSR_AMD64_SEV_ES_ENABLED; + + /* + * With SEV, the rep string I/O instructions need to be unrolled + * but SEV-ES supports them through the #VC handler. + */ + case CC_ATTR_GUEST_UNROLL_STRING_IO: + return (sev_status & MSR_AMD64_SEV_ENABLED) && + !(sev_status & MSR_AMD64_SEV_ES_ENABLED); + + case CC_ATTR_GUEST_SEV_SNP: + return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + + default: + return false; + } +#else + return false; +#endif +} + +static bool hyperv_cc_platform_has(enum cc_attr attr) +{ + return attr == CC_ATTR_GUEST_MEM_ENCRYPT; +} + +bool cc_platform_has(enum cc_attr attr) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return amd_cc_platform_has(attr); + case CC_VENDOR_INTEL: + return intel_cc_platform_has(attr); + case CC_VENDOR_HYPERV: + return hyperv_cc_platform_has(attr); + default: + return false; + } +} +EXPORT_SYMBOL_GPL(cc_platform_has); + +u64 cc_mkenc(u64 val) +{ + /* + * Both AMD and Intel use a bit in the page table to indicate + * encryption status of the page. + * + * - for AMD, bit *set* means the page is encrypted + * - for Intel *clear* means encrypted. + */ + switch (vendor) { + case CC_VENDOR_AMD: + return val | cc_mask; + case CC_VENDOR_INTEL: + return val & ~cc_mask; + default: + return val; + } +} + +u64 cc_mkdec(u64 val) +{ + /* See comment in cc_mkenc() */ + switch (vendor) { + case CC_VENDOR_AMD: + return val & ~cc_mask; + case CC_VENDOR_INTEL: + return val | cc_mask; + default: + return val; + } +} +EXPORT_SYMBOL_GPL(cc_mkdec); + +__init void cc_set_vendor(enum cc_vendor v) +{ + vendor = v; +} + +__init void cc_set_mask(u64 mask) +{ + cc_mask = mask; +} diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile new file mode 100644 index 000000000..46c559985 --- /dev/null +++ b/arch/x86/coco/tdx/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += tdx.o tdcall.o diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S new file mode 100644 index 000000000..f9eb1134f --- /dev/null +++ b/arch/x86/coco/tdx/tdcall.S @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include + +#include +#include +#include + +#include "../../virt/vmx/tdx/tdxcall.S" + +/* + * Bitmasks of exposed registers (with VMM). + */ +#define TDX_R10 BIT(10) +#define TDX_R11 BIT(11) +#define TDX_R12 BIT(12) +#define TDX_R13 BIT(13) +#define TDX_R14 BIT(14) +#define TDX_R15 BIT(15) + +/* + * These registers are clobbered to hold arguments for each + * TDVMCALL. They are safe to expose to the VMM. + * Each bit in this mask represents a register ID. Bit field + * details can be found in TDX GHCI specification, section + * titled "TDCALL [TDG.VP.VMCALL] leaf". + */ +#define TDVMCALL_EXPOSE_REGS_MASK ( TDX_R10 | TDX_R11 | \ + TDX_R12 | TDX_R13 | \ + TDX_R14 | TDX_R15 ) + +/* + * __tdx_module_call() - Used by TDX guests to request services from + * the TDX module (does not include VMM services) using TDCALL instruction. + * + * Transforms function call register arguments into the TDCALL register ABI. + * After TDCALL operation, TDX module output is saved in @out (if it is + * provided by the user). + * + *------------------------------------------------------------------------- + * TDCALL ABI: + *------------------------------------------------------------------------- + * Input Registers: + * + * RAX - TDCALL Leaf number. + * RCX,RDX,R8-R9 - TDCALL Leaf specific input registers. + * + * Output Registers: + * + * RAX - TDCALL instruction error code. + * RCX,RDX,R8-R11 - TDCALL Leaf specific output registers. + * + *------------------------------------------------------------------------- + * + * __tdx_module_call() function ABI: + * + * @fn (RDI) - TDCALL Leaf ID, moved to RAX + * @rcx (RSI) - Input parameter 1, moved to RCX + * @rdx (RDX) - Input parameter 2, moved to RDX + * @r8 (RCX) - Input parameter 3, moved to R8 + * @r9 (R8) - Input parameter 4, moved to R9 + * + * @out (R9) - struct tdx_module_output pointer + * stored temporarily in R12 (not + * shared with the TDX module). It + * can be NULL. + * + * Return status of TDCALL via RAX. + */ +SYM_FUNC_START(__tdx_module_call) + FRAME_BEGIN + TDX_MODULE_CALL host=0 + FRAME_END + RET +SYM_FUNC_END(__tdx_module_call) + +/* + * __tdx_hypercall() - Make hypercalls to a TDX VMM using TDVMCALL leaf + * of TDCALL instruction + * + * Transforms values in function call argument struct tdx_hypercall_args @args + * into the TDCALL register ABI. After TDCALL operation, VMM output is saved + * back in @args. + * + *------------------------------------------------------------------------- + * TD VMCALL ABI: + *------------------------------------------------------------------------- + * + * Input Registers: + * + * RAX - TDCALL instruction leaf number (0 - TDG.VP.VMCALL) + * RCX - BITMAP which controls which part of TD Guest GPR + * is passed as-is to the VMM and back. + * R10 - Set 0 to indicate TDCALL follows standard TDX ABI + * specification. Non zero value indicates vendor + * specific ABI. + * R11 - VMCALL sub function number + * RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific arguments. + * R8-R9, R12-R15 - Same as above. + * + * Output Registers: + * + * RAX - TDCALL instruction status (Not related to hypercall + * output). + * R10 - Hypercall output error code. + * R11-R15 - Hypercall sub function specific output values. + * + *------------------------------------------------------------------------- + * + * __tdx_hypercall() function ABI: + * + * @args (RDI) - struct tdx_hypercall_args for input and output + * @flags (RSI) - TDX_HCALL_* flags + * + * On successful completion, return the hypercall error code. + */ +SYM_FUNC_START(__tdx_hypercall) + FRAME_BEGIN + + /* Save callee-saved GPRs as mandated by the x86_64 ABI */ + push %r15 + push %r14 + push %r13 + push %r12 + + /* Mangle function call ABI into TDCALL ABI: */ + /* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */ + xor %eax, %eax + + /* Copy hypercall registers from arg struct: */ + movq TDX_HYPERCALL_r10(%rdi), %r10 + movq TDX_HYPERCALL_r11(%rdi), %r11 + movq TDX_HYPERCALL_r12(%rdi), %r12 + movq TDX_HYPERCALL_r13(%rdi), %r13 + movq TDX_HYPERCALL_r14(%rdi), %r14 + movq TDX_HYPERCALL_r15(%rdi), %r15 + + movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx + + /* + * For the idle loop STI needs to be called directly before the TDCALL + * that enters idle (EXIT_REASON_HLT case). STI instruction enables + * interrupts only one instruction later. If there is a window between + * STI and the instruction that emulates the HALT state, there is a + * chance for interrupts to happen in this window, which can delay the + * HLT operation indefinitely. Since this is the not the desired + * result, conditionally call STI before TDCALL. + */ + testq $TDX_HCALL_ISSUE_STI, %rsi + jz .Lskip_sti + sti +.Lskip_sti: + tdcall + + /* + * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that + * something has gone horribly wrong with the TDX module. + * + * The return status of the hypercall operation is in a separate + * register (in R10). Hypercall errors are a part of normal operation + * and are handled by callers. + */ + testq %rax, %rax + jne .Lpanic + + /* TDVMCALL leaf return code is in R10 */ + movq %r10, %rax + + /* Copy hypercall result registers to arg struct if needed */ + testq $TDX_HCALL_HAS_OUTPUT, %rsi + jz .Lout + + movq %r10, TDX_HYPERCALL_r10(%rdi) + movq %r11, TDX_HYPERCALL_r11(%rdi) + movq %r12, TDX_HYPERCALL_r12(%rdi) + movq %r13, TDX_HYPERCALL_r13(%rdi) + movq %r14, TDX_HYPERCALL_r14(%rdi) + movq %r15, TDX_HYPERCALL_r15(%rdi) +.Lout: + /* + * Zero out registers exposed to the VMM to avoid speculative execution + * with VMM-controlled values. This needs to include all registers + * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15 + * context will be restored. + */ + xor %r10d, %r10d + xor %r11d, %r11d + + /* Restore callee-saved GPRs as mandated by the x86_64 ABI */ + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + FRAME_END + + RET +.Lpanic: + call __tdx_hypercall_failed + /* __tdx_hypercall_failed never returns */ + REACHABLE + jmp .Lpanic +SYM_FUNC_END(__tdx_hypercall) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c new file mode 100644 index 000000000..d0565a9e7 --- /dev/null +++ b/arch/x86/coco/tdx/tdx.c @@ -0,0 +1,834 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021-2022 Intel Corporation */ + +#undef pr_fmt +#define pr_fmt(fmt) "tdx: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +/* TDX module Call Leaf IDs */ +#define TDX_GET_INFO 1 +#define TDX_GET_VEINFO 3 +#define TDX_ACCEPT_PAGE 6 + +/* TDX hypercall Leaf IDs */ +#define TDVMCALL_MAP_GPA 0x10001 + +/* MMIO direction */ +#define EPT_READ 0 +#define EPT_WRITE 1 + +/* Port I/O direction */ +#define PORT_READ 0 +#define PORT_WRITE 1 + +/* See Exit Qualification for I/O Instructions in VMX documentation */ +#define VE_IS_IO_IN(e) ((e) & BIT(3)) +#define VE_GET_IO_SIZE(e) (((e) & GENMASK(2, 0)) + 1) +#define VE_GET_PORT_NUM(e) ((e) >> 16) +#define VE_IS_IO_STRING(e) ((e) & BIT(4)) + +#define ATTR_SEPT_VE_DISABLE BIT(28) + +/* + * Wrapper for standard use of __tdx_hypercall with no output aside from + * return code. + */ +static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = fn, + .r12 = r12, + .r13 = r13, + .r14 = r14, + .r15 = r15, + }; + + return __tdx_hypercall(&args, 0); +} + +/* Called from __tdx_hypercall() for unrecoverable failure */ +void __tdx_hypercall_failed(void) +{ + panic("TDVMCALL failed. TDX module bug?"); +} + +/* + * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined + * independently from but are currently matched 1:1 with VMX EXIT_REASONs. + * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and + * guest sides of these calls. + */ +static u64 hcall_func(u64 exit_reason) +{ + return exit_reason; +} + +#ifdef CONFIG_KVM_GUEST +long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, + unsigned long p3, unsigned long p4) +{ + struct tdx_hypercall_args args = { + .r10 = nr, + .r11 = p1, + .r12 = p2, + .r13 = p3, + .r14 = p4, + }; + + return __tdx_hypercall(&args, 0); +} +EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); +#endif + +/* + * Used for TDX guests to make calls directly to the TD module. This + * should only be used for calls that have no legitimate reason to fail + * or where the kernel can not survive the call failing. + */ +static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, + struct tdx_module_output *out) +{ + if (__tdx_module_call(fn, rcx, rdx, r8, r9, out)) + panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); +} + +static void tdx_parse_tdinfo(u64 *cc_mask) +{ + struct tdx_module_output out; + unsigned int gpa_width; + u64 td_attr; + + /* + * TDINFO TDX module call is used to get the TD execution environment + * information like GPA width, number of available vcpus, debug mode + * information, etc. More details about the ABI can be found in TDX + * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL + * [TDG.VP.INFO]. + */ + tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); + + /* + * The highest bit of a guest physical address is the "sharing" bit. + * Set it for shared pages and clear it for private pages. + * + * The GPA width that comes out of this call is critical. TDX guests + * can not meaningfully run without it. + */ + gpa_width = out.rcx & GENMASK(5, 0); + *cc_mask = BIT_ULL(gpa_width - 1); + + /* + * The kernel can not handle #VE's when accessing normal kernel + * memory. Ensure that no #VE will be delivered for accesses to + * TD-private memory. Only VMM-shared memory (MMIO) will #VE. + */ + td_attr = out.rdx; + if (!(td_attr & ATTR_SEPT_VE_DISABLE)) + panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n"); +} + +/* + * The TDX module spec states that #VE may be injected for a limited set of + * reasons: + * + * - Emulation of the architectural #VE injection on EPT violation; + * + * - As a result of guest TD execution of a disallowed instruction, + * a disallowed MSR access, or CPUID virtualization; + * + * - A notification to the guest TD about anomalous behavior; + * + * The last one is opt-in and is not used by the kernel. + * + * The Intel Software Developer's Manual describes cases when instruction + * length field can be used in section "Information for VM Exits Due to + * Instruction Execution". + * + * For TDX, it ultimately means GET_VEINFO provides reliable instruction length + * information if #VE occurred due to instruction execution, but not for EPT + * violations. + */ +static int ve_instr_len(struct ve_info *ve) +{ + switch (ve->exit_reason) { + case EXIT_REASON_HLT: + case EXIT_REASON_MSR_READ: + case EXIT_REASON_MSR_WRITE: + case EXIT_REASON_CPUID: + case EXIT_REASON_IO_INSTRUCTION: + /* It is safe to use ve->instr_len for #VE due instructions */ + return ve->instr_len; + case EXIT_REASON_EPT_VIOLATION: + /* + * For EPT violations, ve->insn_len is not defined. For those, + * the kernel must decode instructions manually and should not + * be using this function. + */ + WARN_ONCE(1, "ve->instr_len is not defined for EPT violations"); + return 0; + default: + WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason); + return ve->instr_len; + } +} + +static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = hcall_func(EXIT_REASON_HLT), + .r12 = irq_disabled, + }; + + /* + * Emulate HLT operation via hypercall. More info about ABI + * can be found in TDX Guest-Host-Communication Interface + * (GHCI), section 3.8 TDG.VP.VMCALL. + * + * The VMM uses the "IRQ disabled" param to understand IRQ + * enabled status (RFLAGS.IF) of the TD guest and to determine + * whether or not it should schedule the halted vCPU if an + * IRQ becomes pending. E.g. if IRQs are disabled, the VMM + * can keep the vCPU in virtual HLT, even if an IRQ is + * pending, without hanging/breaking the guest. + */ + return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); +} + +static int handle_halt(struct ve_info *ve) +{ + /* + * Since non safe halt is mainly used in CPU offlining + * and the guest will always stay in the halt state, don't + * call the STI instruction (set do_sti as false). + */ + const bool irq_disabled = irqs_disabled(); + const bool do_sti = false; + + if (__halt(irq_disabled, do_sti)) + return -EIO; + + return ve_instr_len(ve); +} + +void __cpuidle tdx_safe_halt(void) +{ + /* + * For do_sti=true case, __tdx_hypercall() function enables + * interrupts using the STI instruction before the TDCALL. So + * set irq_disabled as false. + */ + const bool irq_disabled = false; + const bool do_sti = true; + + /* + * Use WARN_ONCE() to report the failure. + */ + if (__halt(irq_disabled, do_sti)) + WARN_ONCE(1, "HLT instruction emulation failed\n"); +} + +static int read_msr(struct pt_regs *regs, struct ve_info *ve) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = hcall_func(EXIT_REASON_MSR_READ), + .r12 = regs->cx, + }; + + /* + * Emulate the MSR read via hypercall. More info about ABI + * can be found in TDX Guest-Host-Communication Interface + * (GHCI), section titled "TDG.VP.VMCALL". + */ + if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + return -EIO; + + regs->ax = lower_32_bits(args.r11); + regs->dx = upper_32_bits(args.r11); + return ve_instr_len(ve); +} + +static int write_msr(struct pt_regs *regs, struct ve_info *ve) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = hcall_func(EXIT_REASON_MSR_WRITE), + .r12 = regs->cx, + .r13 = (u64)regs->dx << 32 | regs->ax, + }; + + /* + * Emulate the MSR write via hypercall. More info about ABI + * can be found in TDX Guest-Host-Communication Interface + * (GHCI) section titled "TDG.VP.VMCALL". + */ + if (__tdx_hypercall(&args, 0)) + return -EIO; + + return ve_instr_len(ve); +} + +static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = hcall_func(EXIT_REASON_CPUID), + .r12 = regs->ax, + .r13 = regs->cx, + }; + + /* + * Only allow VMM to control range reserved for hypervisor + * communication. + * + * Return all-zeros for any CPUID outside the range. It matches CPU + * behaviour for non-supported leaf. + */ + if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) { + regs->ax = regs->bx = regs->cx = regs->dx = 0; + return ve_instr_len(ve); + } + + /* + * Emulate the CPUID instruction via a hypercall. More info about + * ABI can be found in TDX Guest-Host-Communication Interface + * (GHCI), section titled "VP.VMCALL". + */ + if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + return -EIO; + + /* + * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of + * EAX, EBX, ECX, EDX registers after the CPUID instruction execution. + * So copy the register contents back to pt_regs. + */ + regs->ax = args.r12; + regs->bx = args.r13; + regs->cx = args.r14; + regs->dx = args.r15; + + return ve_instr_len(ve); +} + +static bool mmio_read(int size, unsigned long addr, unsigned long *val) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION), + .r12 = size, + .r13 = EPT_READ, + .r14 = addr, + .r15 = *val, + }; + + if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + return false; + *val = args.r11; + return true; +} + +static bool mmio_write(int size, unsigned long addr, unsigned long val) +{ + return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size, + EPT_WRITE, addr, val); +} + +static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) +{ + unsigned long *reg, val, vaddr; + char buffer[MAX_INSN_SIZE]; + struct insn insn = {}; + enum mmio_type mmio; + int size, extend_size; + u8 extend_val = 0; + + /* Only in-kernel MMIO is supported */ + if (WARN_ON_ONCE(user_mode(regs))) + return -EFAULT; + + if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) + return -EFAULT; + + if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) + return -EINVAL; + + mmio = insn_decode_mmio(&insn, &size); + if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) + return -EINVAL; + + if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { + reg = insn_get_modrm_reg_ptr(&insn, regs); + if (!reg) + return -EINVAL; + } + + /* + * Reject EPT violation #VEs that split pages. + * + * MMIO accesses are supposed to be naturally aligned and therefore + * never cross page boundaries. Seeing split page accesses indicates + * a bug or a load_unaligned_zeropad() that stepped into an MMIO page. + * + * load_unaligned_zeropad() will recover using exception fixups. + */ + vaddr = (unsigned long)insn_get_addr_ref(&insn, regs); + if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE) + return -EFAULT; + + /* Handle writes first */ + switch (mmio) { + case MMIO_WRITE: + memcpy(&val, reg, size); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn.length; + case MMIO_WRITE_IMM: + val = insn.immediate.value; + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn.length; + case MMIO_READ: + case MMIO_READ_ZERO_EXTEND: + case MMIO_READ_SIGN_EXTEND: + /* Reads are handled below */ + break; + case MMIO_MOVS: + case MMIO_DECODE_FAILED: + /* + * MMIO was accessed with an instruction that could not be + * decoded or handled properly. It was likely not using io.h + * helpers or accessed MMIO accidentally. + */ + return -EINVAL; + default: + WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); + return -EINVAL; + } + + /* Handle reads */ + if (!mmio_read(size, ve->gpa, &val)) + return -EIO; + + switch (mmio) { + case MMIO_READ: + /* Zero-extend for 32-bit operation */ + extend_size = size == 4 ? sizeof(*reg) : 0; + break; + case MMIO_READ_ZERO_EXTEND: + /* Zero extend based on operand size */ + extend_size = insn.opnd_bytes; + break; + case MMIO_READ_SIGN_EXTEND: + /* Sign extend based on operand size */ + extend_size = insn.opnd_bytes; + if (size == 1 && val & BIT(7)) + extend_val = 0xFF; + else if (size > 1 && val & BIT(15)) + extend_val = 0xFF; + break; + default: + /* All other cases has to be covered with the first switch() */ + WARN_ON_ONCE(1); + return -EINVAL; + } + + if (extend_size) + memset(reg, extend_val, extend_size); + memcpy(reg, &val, size); + return insn.length; +} + +static bool handle_in(struct pt_regs *regs, int size, int port) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), + .r12 = size, + .r13 = PORT_READ, + .r14 = port, + }; + u64 mask = GENMASK(BITS_PER_BYTE * size, 0); + bool success; + + /* + * Emulate the I/O read via hypercall. More info about ABI can be found + * in TDX Guest-Host-Communication Interface (GHCI) section titled + * "TDG.VP.VMCALL". + */ + success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT); + + /* Update part of the register affected by the emulated instruction */ + regs->ax &= ~mask; + if (success) + regs->ax |= args.r11 & mask; + + return success; +} + +static bool handle_out(struct pt_regs *regs, int size, int port) +{ + u64 mask = GENMASK(BITS_PER_BYTE * size, 0); + + /* + * Emulate the I/O write via hypercall. More info about ABI can be found + * in TDX Guest-Host-Communication Interface (GHCI) section titled + * "TDG.VP.VMCALL". + */ + return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size, + PORT_WRITE, port, regs->ax & mask); +} + +/* + * Emulate I/O using hypercall. + * + * Assumes the IO instruction was using ax, which is enforced + * by the standard io.h macros. + * + * Return True on success or False on failure. + */ +static int handle_io(struct pt_regs *regs, struct ve_info *ve) +{ + u32 exit_qual = ve->exit_qual; + int size, port; + bool in, ret; + + if (VE_IS_IO_STRING(exit_qual)) + return -EIO; + + in = VE_IS_IO_IN(exit_qual); + size = VE_GET_IO_SIZE(exit_qual); + port = VE_GET_PORT_NUM(exit_qual); + + + if (in) + ret = handle_in(regs, size, port); + else + ret = handle_out(regs, size, port); + if (!ret) + return -EIO; + + return ve_instr_len(ve); +} + +/* + * Early #VE exception handler. Only handles a subset of port I/O. + * Intended only for earlyprintk. If failed, return false. + */ +__init bool tdx_early_handle_ve(struct pt_regs *regs) +{ + struct ve_info ve; + int insn_len; + + tdx_get_ve_info(&ve); + + if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION) + return false; + + insn_len = handle_io(regs, &ve); + if (insn_len < 0) + return false; + + regs->ip += insn_len; + return true; +} + +void tdx_get_ve_info(struct ve_info *ve) +{ + struct tdx_module_output out; + + /* + * Called during #VE handling to retrieve the #VE info from the + * TDX module. + * + * This has to be called early in #VE handling. A "nested" #VE which + * occurs before this will raise a #DF and is not recoverable. + * + * The call retrieves the #VE info from the TDX module, which also + * clears the "#VE valid" flag. This must be done before anything else + * because any #VE that occurs while the valid flag is set will lead to + * #DF. + * + * Note, the TDX module treats virtual NMIs as inhibited if the #VE + * valid flag is set. It means that NMI=>#VE will not result in a #DF. + */ + tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out); + + /* Transfer the output parameters */ + ve->exit_reason = out.rcx; + ve->exit_qual = out.rdx; + ve->gla = out.r8; + ve->gpa = out.r9; + ve->instr_len = lower_32_bits(out.r10); + ve->instr_info = upper_32_bits(out.r10); +} + +/* + * Handle the user initiated #VE. + * + * On success, returns the number of bytes RIP should be incremented (>=0) + * or -errno on error. + */ +static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve) +{ + switch (ve->exit_reason) { + case EXIT_REASON_CPUID: + return handle_cpuid(regs, ve); + default: + pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); + return -EIO; + } +} + +/* + * Handle the kernel #VE. + * + * On success, returns the number of bytes RIP should be incremented (>=0) + * or -errno on error. + */ +static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) +{ + switch (ve->exit_reason) { + case EXIT_REASON_HLT: + return handle_halt(ve); + case EXIT_REASON_MSR_READ: + return read_msr(regs, ve); + case EXIT_REASON_MSR_WRITE: + return write_msr(regs, ve); + case EXIT_REASON_CPUID: + return handle_cpuid(regs, ve); + case EXIT_REASON_EPT_VIOLATION: + return handle_mmio(regs, ve); + case EXIT_REASON_IO_INSTRUCTION: + return handle_io(regs, ve); + default: + pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); + return -EIO; + } +} + +bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve) +{ + int insn_len; + + if (user_mode(regs)) + insn_len = virt_exception_user(regs, ve); + else + insn_len = virt_exception_kernel(regs, ve); + if (insn_len < 0) + return false; + + /* After successful #VE handling, move the IP */ + regs->ip += insn_len; + + return true; +} + +static bool tdx_tlb_flush_required(bool private) +{ + /* + * TDX guest is responsible for flushing TLB on private->shared + * transition. VMM is responsible for flushing on shared->private. + * + * The VMM _can't_ flush private addresses as it can't generate PAs + * with the guest's HKID. Shared memory isn't subject to integrity + * checking, i.e. the VMM doesn't need to flush for its own protection. + * + * There's no need to flush when converting from shared to private, + * as flushing is the VMM's responsibility in this case, e.g. it must + * flush to avoid integrity failures in the face of a buggy or + * malicious guest. + */ + return !private; +} + +static bool tdx_cache_flush_required(void) +{ + /* + * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence. + * TDX doesn't have such capability. + * + * Flush cache unconditionally. + */ + return true; +} + +static bool try_accept_one(phys_addr_t *start, unsigned long len, + enum pg_level pg_level) +{ + unsigned long accept_size = page_level_size(pg_level); + u64 tdcall_rcx; + u8 page_size; + + if (!IS_ALIGNED(*start, accept_size)) + return false; + + if (len < accept_size) + return false; + + /* + * Pass the page physical address to the TDX module to accept the + * pending, private page. + * + * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. + */ + switch (pg_level) { + case PG_LEVEL_4K: + page_size = 0; + break; + case PG_LEVEL_2M: + page_size = 1; + break; + case PG_LEVEL_1G: + page_size = 2; + break; + default: + return false; + } + + tdcall_rcx = *start | page_size; + if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) + return false; + + *start += accept_size; + return true; +} + +/* + * Inform the VMM of the guest's intent for this physical page: shared with + * the VMM or private to the guest. The VMM is expected to change its mapping + * of the page in response. + */ +static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) +{ + phys_addr_t start = __pa(vaddr); + phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); + + if (!enc) { + /* Set the shared (decrypted) bits: */ + start |= cc_mkdec(0); + end |= cc_mkdec(0); + } + + /* + * Notify the VMM about page mapping conversion. More info about ABI + * can be found in TDX Guest-Host-Communication Interface (GHCI), + * section "TDG.VP.VMCALL" + */ + if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) + return false; + + /* private->shared conversion requires only MapGPA call */ + if (!enc) + return true; + + /* + * For shared->private conversion, accept the page using + * TDX_ACCEPT_PAGE TDX module call. + */ + while (start < end) { + unsigned long len = end - start; + + /* + * Try larger accepts first. It gives chance to VMM to keep + * 1G/2M SEPT entries where possible and speeds up process by + * cutting number of hypercalls (if successful). + */ + + if (try_accept_one(&start, len, PG_LEVEL_1G)) + continue; + + if (try_accept_one(&start, len, PG_LEVEL_2M)) + continue; + + if (!try_accept_one(&start, len, PG_LEVEL_4K)) + return false; + } + + return true; +} + +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle shared->private conversion here. + * See the comment in tdx_early_init(). + */ + if (enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} + +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle private->shared conversion here. + * See the comment in tdx_early_init(). + */ + if (!enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} + +void __init tdx_early_init(void) +{ + u64 cc_mask; + u32 eax, sig[3]; + + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]); + + if (memcmp(TDX_IDENT, sig, sizeof(sig))) + return; + + setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); + + cc_set_vendor(CC_VENDOR_INTEL); + tdx_parse_tdinfo(&cc_mask); + cc_set_mask(cc_mask); + + /* + * All bits above GPA width are reserved and kernel treats shared bit + * as flag, not as part of physical address. + * + * Adjust physical mask to only cover valid GPA bits. + */ + physical_mask &= cc_mask - 1; + + /* + * The kernel mapping should match the TDX metadata for the page. + * load_unaligned_zeropad() can touch memory *adjacent* to that which is + * owned by the caller and can catch even _momentary_ mismatches. Bad + * things happen on mismatch: + * + * - Private mapping => Shared Page == Guest shutdown + * - Shared mapping => Private Page == Recoverable #VE + * + * guest.enc_status_change_prepare() converts the page from + * shared=>private before the mapping becomes private. + * + * guest.enc_status_change_finish() converts the page from + * private=>shared after the mapping becomes private. + * + * In both cases there is a temporary shared mapping to a private page, + * which can result in a #VE. But, there is never a private mapping to + * a shared page. + */ + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish; + + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + + pr_info("Guest detected\n"); +} diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig new file mode 100644 index 000000000..3cf34912a --- /dev/null +++ b/arch/x86/configs/i386_defconfig @@ -0,0 +1,285 @@ +CONFIG_WERROR=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_USELIB=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_CGROUPS=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_MISC=y +CONFIG_CGROUP_DEBUG=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_PROFILING=y +CONFIG_SMP=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_NR_CPUS=8 +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +CONFIG_MICROCODE_AMD=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_1000=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +# CONFIG_RETHUNK is not set +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_PM_TRACE_RTC=y +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_BGRT=y +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y +CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_ADVANCED is not set +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_MANGLE=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_NET_SCHED=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_CLS_ACT=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_RFKILL=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_MSI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEBUG_DEVRES=y +CONFIG_CONNECTOR=y +CONFIG_EFI_CAPSULE_LOADER=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_VIRTIO=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_MACINTOSH_DRIVERS=y +CONFIG_MAC_EMUMOUSEBTN=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_NVRAM=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP is not set +CONFIG_I2C_I801=y +CONFIG_WATCHDOG=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_DRM_I915=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_HIDRAW=y +CONFIG_HID_GYRATION=y +CONFIG_LOGITECH_FF=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_INPUT=y +CONFIG_EEEPC_LAPTOP=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_EARLY_PRINTK_DBGP=y +CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_UNWINDER_FRAME_POINTER=y +# CONFIG_64BIT is not set diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config new file mode 100644 index 000000000..66c9e2aab --- /dev/null +++ b/arch/x86/configs/tiny.config @@ -0,0 +1,5 @@ +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set +CONFIG_UNWINDER_GUESS=y +# CONFIG_UNWINDER_FRAME_POINTER is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig new file mode 100644 index 000000000..27759236f --- /dev/null +++ b/arch/x86/configs/x86_64_defconfig @@ -0,0 +1,279 @@ +CONFIG_WERROR=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_CGROUPS=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_MISC=y +CONFIG_CGROUP_DEBUG=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_PROFILING=y +CONFIG_SMP=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +CONFIG_MICROCODE_AMD=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_NUMA=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EFI_MIXED=y +CONFIG_HZ_1000=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_PM_TRACE_RTC=y +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_BGRT=y +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_IA32_EMULATION=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y +CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_ADVANCED is not set +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_MANGLE=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_NET_SCHED=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_CLS_ACT=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_RFKILL=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_HOTPLUG_PCI=y +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEBUG_DEVRES=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_VIRTIO=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_MACINTOSH_DRIVERS=y +CONFIG_MAC_EMUMOUSEBTN=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +CONFIG_R8169=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +CONFIG_NVRAM=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP is not set +CONFIG_I2C_I801=y +CONFIG_WATCHDOG=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_DRM_I915=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_HIDRAW=y +CONFIG_HID_GYRATION=y +CONFIG_LOGITECH_FF=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_INPUT=y +CONFIG_EEEPC_LAPTOP=y +CONFIG_AMD_IOMMU=y +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_EARLY_PRINTK_DBGP=y +CONFIG_DEBUG_BOOT_PARAMS=y diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config new file mode 100644 index 000000000..581296255 --- /dev/null +++ b/arch/x86/configs/xen.config @@ -0,0 +1,27 @@ +# global x86 required specific stuff +# On 32-bit HIGHMEM4G is not allowed +CONFIG_HIGHMEM64G=y +CONFIG_64BIT=y + +# These enable us to allow some of the +# not so generic stuff below +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PCI=y +CONFIG_PCI_MSI=y +CONFIG_X86_MCE=y +CONFIG_ACPI_PROCESSOR=y +CONFIG_CPU_FREQ=y + +# x86 xen specific config options +CONFIG_XEN_PVH=y +CONFIG_XEN_SAVE_RESTORE=y +# CONFIG_XEN_DEBUG_FS is not set +CONFIG_XEN_MCE_LOG=y +CONFIG_XEN_ACPI_PROCESSOR=m +# x86 specific backend drivers +CONFIG_XEN_PCIDEV_BACKEND=m +# x86 specific frontend drivers +CONFIG_XEN_PCIDEV_FRONTEND=m +# depends on MEMORY_HOTPLUG, arm64 doesn't enable this yet, +# move to generic config if it ever does. +CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore new file mode 100644 index 000000000..580c839bb --- /dev/null +++ b/arch/x86/crypto/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +poly1305-x86_64-cryptogams.S diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig new file mode 100644 index 000000000..71c4c473d --- /dev/null +++ b/arch/x86/crypto/Kconfig @@ -0,0 +1,484 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (x86)" + +config CRYPTO_CURVE25519_X86 + tristate "Public key crypto: Curve25519 (ADX)" + depends on X86 && 64BIT + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + help + Curve25519 algorithm + + Architecture: x86_64 using: + - ADX (large integer arithmetic) + +config CRYPTO_AES_NI_INTEL + tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTR, XTS, GCM (AES-NI)" + depends on X86 + select CRYPTO_AEAD + select CRYPTO_LIB_AES + select CRYPTO_ALGAPI + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + help + Block cipher: AES cipher algorithms + AEAD cipher: AES with GCM + Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTR, XTS + + Architecture: x86 (32-bit and 64-bit) using: + - AES-NI (AES new instructions) + +config CRYPTO_BLOWFISH_X86_64 + tristate "Ciphers: Blowfish, modes: ECB, CBC" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_BLOWFISH_COMMON + imply CRYPTO_CTR + help + Block cipher: Blowfish cipher algorithm + Length-preserving ciphers: Blowfish with ECB and CBC modes + + Architecture: x86_64 + +config CRYPTO_CAMELLIA_X86_64 + tristate "Ciphers: Camellia with modes: ECB, CBC" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + imply CRYPTO_CTR + help + Block cipher: Camellia cipher algorithms + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: x86_64 + +config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 + tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_CAMELLIA_X86_64 + select CRYPTO_SIMD + imply CRYPTO_XTS + help + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX (Advanced Vector Extensions) + +config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 + tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_CAMELLIA_AESNI_AVX_X86_64 + help + Length-preserving ciphers: Camellia with ECB and CBC modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_CAST5_AVX_X86_64 + tristate "Ciphers: CAST5 with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_CAST5 + select CRYPTO_CAST_COMMON + select CRYPTO_SIMD + imply CRYPTO_CTR + help + Length-preserving ciphers: CAST5 (CAST-128) cipher algorithm + (RFC2144) with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes 16 blocks in parallel. + +config CRYPTO_CAST6_AVX_X86_64 + tristate "Ciphers: CAST6 with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_CAST6 + select CRYPTO_CAST_COMMON + select CRYPTO_SIMD + imply CRYPTO_XTS + imply CRYPTO_CTR + help + Length-preserving ciphers: CAST6 (CAST-256) cipher algorithm + (RFC2612) with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes eight blocks in parallel. + +config CRYPTO_DES3_EDE_X86_64 + tristate "Ciphers: Triple DES EDE with modes: ECB, CBC" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_LIB_DES + imply CRYPTO_CTR + help + Block cipher: Triple DES EDE (FIPS 46-3) cipher algorithm + Length-preserving ciphers: Triple DES EDE with ECB and CBC modes + + Architecture: x86_64 + + Processes one or three blocks in parallel. + +config CRYPTO_SERPENT_SSE2_X86_64 + tristate "Ciphers: Serpent with modes: ECB, CBC (SSE2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SERPENT + select CRYPTO_SIMD + imply CRYPTO_CTR + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - SSE2 (Streaming SIMD Extensions 2) + + Processes eight blocks in parallel. + +config CRYPTO_SERPENT_SSE2_586 + tristate "Ciphers: Serpent with modes: ECB, CBC (32-bit with SSE2)" + depends on X86 && !64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SERPENT + select CRYPTO_SIMD + imply CRYPTO_CTR + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86 (32-bit) using: + - SSE2 (Streaming SIMD Extensions 2) + + Processes four blocks in parallel. + +config CRYPTO_SERPENT_AVX_X86_64 + tristate "Ciphers: Serpent with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SERPENT + select CRYPTO_SIMD + imply CRYPTO_XTS + imply CRYPTO_CTR + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes eight blocks in parallel. + +config CRYPTO_SERPENT_AVX2_X86_64 + tristate "Ciphers: Serpent with modes: ECB, CBC (AVX2)" + depends on X86 && 64BIT + select CRYPTO_SERPENT_AVX_X86_64 + help + Length-preserving ciphers: Serpent cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - AVX2 (Advanced Vector Extensions 2) + + Processes 16 blocks in parallel. + +config CRYPTO_SM4_AESNI_AVX_X86_64 + tristate "Ciphers: SM4 with modes: ECB, CBC, CFB, CTR (AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_SM4 + help + Length-preserving ciphers: SM4 cipher algorithms + (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX (Advanced Vector Extensions) + + Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + +config CRYPTO_SM4_AESNI_AVX2_X86_64 + tristate "Ciphers: SM4 with modes: ECB, CBC, CFB, CTR (AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_SM4 + select CRYPTO_SM4_AESNI_AVX_X86_64 + help + Length-preserving ciphers: SM4 cipher algorithms + (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX2 (Advanced Vector Extensions 2) + + Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + +config CRYPTO_TWOFISH_586 + tristate "Ciphers: Twofish (32-bit)" + depends on (X86 || UML_X86) && !64BIT + select CRYPTO_ALGAPI + select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR + help + Block cipher: Twofish cipher algorithm + + Architecture: x86 (32-bit) + +config CRYPTO_TWOFISH_X86_64 + tristate "Ciphers: Twofish" + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR + help + Block cipher: Twofish cipher algorithm + + Architecture: x86_64 + +config CRYPTO_TWOFISH_X86_64_3WAY + tristate "Ciphers: Twofish with modes: ECB, CBC (3-way parallel)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_TWOFISH_COMMON + select CRYPTO_TWOFISH_X86_64 + help + Length-preserving cipher: Twofish cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 + + Processes three blocks in parallel, better utilizing resources of + out-of-order CPUs. + +config CRYPTO_TWOFISH_AVX_X86_64 + tristate "Ciphers: Twofish with modes: ECB, CBC (AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_TWOFISH_COMMON + select CRYPTO_TWOFISH_X86_64 + select CRYPTO_TWOFISH_X86_64_3WAY + imply CRYPTO_XTS + help + Length-preserving cipher: Twofish cipher algorithm + with ECB and CBC modes + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + Processes eight blocks in parallel. + +config CRYPTO_ARIA_AESNI_AVX_X86_64 + tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX/GFNI)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_ARIA + help + Length-preserving cipher: ARIA cipher algorithms + (RFC 5794) with ECB and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX (Advanced Vector Extensions) + - GFNI (Galois Field New Instructions) + + Processes 16 blocks in parallel. + +config CRYPTO_CHACHA20_X86_64 + tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 + stream cipher algorithms + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX2 (Advanced Vector Extensions 2) + - AVX-512VL (Advanced Vector Extensions-512VL) + +config CRYPTO_AEGIS128_AESNI_SSE2 + tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)" + depends on X86 && 64BIT + select CRYPTO_AEAD + select CRYPTO_SIMD + help + AEGIS-128 AEAD algorithm + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - SSE2 (Streaming SIMD Extensions 2) + +config CRYPTO_NHPOLY1305_SSE2 + tristate "Hash functions: NHPoly1305 (SSE2)" + depends on X86 && 64BIT + select CRYPTO_NHPOLY1305 + help + NHPoly1305 hash function for Adiantum + + Architecture: x86_64 using: + - SSE2 (Streaming SIMD Extensions 2) + +config CRYPTO_NHPOLY1305_AVX2 + tristate "Hash functions: NHPoly1305 (AVX2)" + depends on X86 && 64BIT + select CRYPTO_NHPOLY1305 + help + NHPoly1305 hash function for Adiantum + + Architecture: x86_64 using: + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_BLAKE2S_X86 + bool "Hash functions: BLAKE2s (SSSE3/AVX-512)" + depends on X86 && 64BIT + select CRYPTO_LIB_BLAKE2S_GENERIC + select CRYPTO_ARCH_HAVE_LIB_BLAKE2S + help + BLAKE2s cryptographic hash function (RFC 7693) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX-512 (Advanced Vector Extensions-512) + +config CRYPTO_POLYVAL_CLMUL_NI + tristate "Hash functions: POLYVAL (CLMUL-NI)" + depends on X86 && 64BIT + select CRYPTO_POLYVAL + help + POLYVAL hash function for HCTR2 + + Architecture: x86_64 using: + - CLMUL-NI (carry-less multiplication new instructions) + +config CRYPTO_POLY1305_X86_64 + tristate "Hash functions: Poly1305 (SSE2/AVX2)" + depends on X86 && 64BIT + select CRYPTO_LIB_POLY1305_GENERIC + select CRYPTO_ARCH_HAVE_LIB_POLY1305 + help + Poly1305 authenticator algorithm (RFC7539) + + Architecture: x86_64 using: + - SSE2 (Streaming SIMD Extensions 2) + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_SHA1_SSSE3 + tristate "Hash functions: SHA-1 (SSSE3/AVX/AVX2/SHA-NI)" + depends on X86 && 64BIT + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash algorithm (FIPS 180) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX (Advanced Vector Extensions) + - AVX2 (Advanced Vector Extensions 2) + - SHA-NI (SHA Extensions New Instructions) + +config CRYPTO_SHA256_SSSE3 + tristate "Hash functions: SHA-224 and SHA-256 (SSSE3/AVX/AVX2/SHA-NI)" + depends on X86 && 64BIT + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-224 and SHA-256 secure hash algorithms (FIPS 180) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX (Advanced Vector Extensions) + - AVX2 (Advanced Vector Extensions 2) + - SHA-NI (SHA Extensions New Instructions) + +config CRYPTO_SHA512_SSSE3 + tristate "Hash functions: SHA-384 and SHA-512 (SSSE3/AVX/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-384 and SHA-512 secure hash algorithms (FIPS 180) + + Architecture: x86_64 using: + - SSSE3 (Supplemental SSE3) + - AVX (Advanced Vector Extensions) + - AVX2 (Advanced Vector Extensions 2) + +config CRYPTO_SM3_AVX_X86_64 + tristate "Hash functions: SM3 (AVX)" + depends on X86 && 64BIT + select CRYPTO_HASH + select CRYPTO_SM3 + help + SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3 + + Architecture: x86_64 using: + - AVX (Advanced Vector Extensions) + + If unsure, say N. + +config CRYPTO_GHASH_CLMUL_NI_INTEL + tristate "Hash functions: GHASH (CLMUL-NI)" + depends on X86 && 64BIT + select CRYPTO_CRYPTD + help + GCM GHASH hash function (NIST SP800-38D) + + Architecture: x86_64 using: + - CLMUL-NI (carry-less multiplication new instructions) + +config CRYPTO_CRC32C_INTEL + tristate "CRC32c (SSE4.2/PCLMULQDQ)" + depends on X86 + select CRYPTO_HASH + help + CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) + + Architecture: x86 (32-bit and 64-bit) using: + - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction + - PCLMULQDQ (carry-less multiplication) + +config CRYPTO_CRC32_PCLMUL + tristate "CRC32 (PCLMULQDQ)" + depends on X86 + select CRYPTO_HASH + select CRC32 + help + CRC32 CRC algorithm (IEEE 802.3) + + Architecture: x86 (32-bit and 64-bit) using: + - PCLMULQDQ (carry-less multiplication) + +config CRYPTO_CRCT10DIF_PCLMUL + tristate "CRCT10DIF (PCLMULQDQ)" + depends on X86 && 64BIT && CRC_T10DIF + select CRYPTO_HASH + help + CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) + + Architecture: x86_64 using: + - PCLMULQDQ (carry-less multiplication) + +endmenu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile new file mode 100644 index 000000000..3b1d701a4 --- /dev/null +++ b/arch/x86/crypto/Makefile @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# x86 crypto algorithms + +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o +twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o +twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o +obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o +twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o + +obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o +serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o +obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o +serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o +obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o +serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o +obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o +serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o + +obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o +des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o + +obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o +camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o +obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o +camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o camellia_aesni_avx_glue.o +obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o +camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o + +obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o +blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o + +obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o +cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o + +obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o +cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o + +obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o +aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o + +obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o +chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha_glue.o +chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o + +obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o + +obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o +sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o + +obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o +sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o +sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o + +obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o +sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o + +obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o +libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o + +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o +ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o + +obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o +polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o + +obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o +crc32c-intel-y := crc32c-intel_glue.o +crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o + +obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o +crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o + +obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o +crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o + +obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o +poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o +targets += poly1305-x86_64-cryptogams.S + +obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o +nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o +nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o + +obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o + +obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o +sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o + +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o +sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o + +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o +sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o + +obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o +aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $< > $@ +$(obj)/%.S: $(src)/%.pl FORCE + $(call if_changed,perlasm) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S new file mode 100644 index 000000000..cdf3215ec --- /dev/null +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -0,0 +1,748 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AES-NI + SSE2 implementation of AEGIS-128 + * + * Copyright (c) 2017-2018 Ondrej Mosnacek + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + */ + +#include +#include +#include + +#define STATE0 %xmm0 +#define STATE1 %xmm1 +#define STATE2 %xmm2 +#define STATE3 %xmm3 +#define STATE4 %xmm4 +#define KEY %xmm5 +#define MSG %xmm5 +#define T0 %xmm6 +#define T1 %xmm7 + +#define STATEP %rdi +#define LEN %rsi +#define SRC %rdx +#define DST %rcx + +.section .rodata.cst16.aegis128_const, "aM", @progbits, 32 +.align 16 +.Laegis128_const_0: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 +.Laegis128_const_1: + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 +.align 16 +.Laegis128_counter: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + +.text + +/* + * aegis128_update + * input: + * STATE[0-4] - input state + * output: + * STATE[0-4] - output state (shifted positions) + * changed: + * T0 + */ +.macro aegis128_update + movdqa STATE4, T0 + aesenc STATE0, STATE4 + aesenc STATE1, STATE0 + aesenc STATE2, STATE1 + aesenc STATE3, STATE2 + aesenc T0, STATE3 +.endm + +/* + * __load_partial: internal ABI + * input: + * LEN - bytes + * SRC - src + * output: + * MSG - message block + * changed: + * T0 + * %r8 + * %r9 + */ +SYM_FUNC_START_LOCAL(__load_partial) + xor %r9d, %r9d + pxor MSG, MSG + + mov LEN, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov LEN, %r8 + and $0x1E, %r8 + add SRC, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov LEN, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov LEN, %r8 + and $0x1C, %r8 + add SRC, %r8 + shl $0x10, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov LEN, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov LEN, %r8 + and $0x18, %r8 + add SRC, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG + + mov LEN, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov LEN, %r8 + and $0x10, %r8 + add SRC, %r8 + pslldq $8, MSG + movq (%r8), T0 + pxor T0, MSG + +.Lld_partial_8: + RET +SYM_FUNC_END(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * LEN - bytes + * DST - dst + * output: + * T0 - message block + * changed: + * %r8 + * %r9 + * %r10 + */ +SYM_FUNC_START_LOCAL(__store_partial) + mov LEN, %r8 + mov DST, %r9 + + movq T0, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + psrldq $8, T0 + movq T0, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $0x10, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + RET +SYM_FUNC_END(__store_partial) + +/* + * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); + */ +SYM_FUNC_START(crypto_aegis128_aesni_init) + FRAME_BEGIN + + /* load IV: */ + movdqu (%rdx), T1 + + /* load key: */ + movdqa (%rsi), KEY + pxor KEY, T1 + movdqa T1, STATE0 + movdqa KEY, STATE3 + movdqa KEY, STATE4 + + /* load the constants: */ + movdqa .Laegis128_const_0, STATE2 + movdqa .Laegis128_const_1, STATE1 + pxor STATE2, STATE3 + pxor STATE1, STATE4 + + /* update 10 times with KEY / KEY xor IV: */ + aegis128_update; pxor KEY, STATE4 + aegis128_update; pxor T1, STATE3 + aegis128_update; pxor KEY, STATE2 + aegis128_update; pxor T1, STATE1 + aegis128_update; pxor KEY, STATE0 + aegis128_update; pxor T1, STATE4 + aegis128_update; pxor KEY, STATE3 + aegis128_update; pxor T1, STATE2 + aegis128_update; pxor KEY, STATE1 + aegis128_update; pxor T1, STATE0 + + /* store the state: */ + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_init) + +/* + * void crypto_aegis128_aesni_ad(void *state, unsigned int length, + * const void *data); + */ +SYM_FUNC_START(crypto_aegis128_aesni_ad) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Lad_out + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + mov SRC, %r8 + and $0xF, %r8 + jnz .Lad_u_loop + +.align 8 +.Lad_a_loop: + movdqa 0x00(SRC), MSG + aegis128_update + pxor MSG, STATE4 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_1 + + movdqa 0x10(SRC), MSG + aegis128_update + pxor MSG, STATE3 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_2 + + movdqa 0x20(SRC), MSG + aegis128_update + pxor MSG, STATE2 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_3 + + movdqa 0x30(SRC), MSG + aegis128_update + pxor MSG, STATE1 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_4 + + movdqa 0x40(SRC), MSG + aegis128_update + pxor MSG, STATE0 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_0 + + add $0x50, SRC + jmp .Lad_a_loop + +.align 8 +.Lad_u_loop: + movdqu 0x00(SRC), MSG + aegis128_update + pxor MSG, STATE4 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_1 + + movdqu 0x10(SRC), MSG + aegis128_update + pxor MSG, STATE3 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_2 + + movdqu 0x20(SRC), MSG + aegis128_update + pxor MSG, STATE2 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_3 + + movdqu 0x30(SRC), MSG + aegis128_update + pxor MSG, STATE1 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_4 + + movdqu 0x40(SRC), MSG + aegis128_update + pxor MSG, STATE0 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_0 + + add $0x50, SRC + jmp .Lad_u_loop + + /* store the state: */ +.Lad_out_0: + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END + RET + +.Lad_out_1: + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END + RET + +.Lad_out_2: + movdqu STATE3, 0x00(STATEP) + movdqu STATE4, 0x10(STATEP) + movdqu STATE0, 0x20(STATEP) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END + RET + +.Lad_out_3: + movdqu STATE2, 0x00(STATEP) + movdqu STATE3, 0x10(STATEP) + movdqu STATE4, 0x20(STATEP) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END + RET + +.Lad_out_4: + movdqu STATE1, 0x00(STATEP) + movdqu STATE2, 0x10(STATEP) + movdqu STATE3, 0x20(STATEP) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END + RET + +.Lad_out: + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_ad) + +.macro encrypt_block a s0 s1 s2 s3 s4 i + movdq\a (\i * 0x10)(SRC), MSG + movdqa MSG, T0 + pxor \s1, T0 + pxor \s4, T0 + movdqa \s2, T1 + pand \s3, T1 + pxor T1, T0 + movdq\a T0, (\i * 0x10)(DST) + + aegis128_update + pxor MSG, \s4 + + sub $0x10, LEN + cmp $0x10, LEN + jl .Lenc_out_\i +.endm + +/* + * void crypto_aegis128_aesni_enc(void *state, unsigned int length, + * const void *src, void *dst); + */ +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Lenc_out + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + mov SRC, %r8 + or DST, %r8 + and $0xF, %r8 + jnz .Lenc_u_loop + +.align 8 +.Lenc_a_loop: + encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 + encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 + encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 + encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 + encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Lenc_a_loop + +.align 8 +.Lenc_u_loop: + encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 + encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 + encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 + encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 + encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Lenc_u_loop + + /* store the state: */ +.Lenc_out_0: + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END + RET + +.Lenc_out_1: + movdqu STATE3, 0x00(STATEP) + movdqu STATE4, 0x10(STATEP) + movdqu STATE0, 0x20(STATEP) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END + RET + +.Lenc_out_2: + movdqu STATE2, 0x00(STATEP) + movdqu STATE3, 0x10(STATEP) + movdqu STATE4, 0x20(STATEP) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END + RET + +.Lenc_out_3: + movdqu STATE1, 0x00(STATEP) + movdqu STATE2, 0x10(STATEP) + movdqu STATE3, 0x20(STATEP) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END + RET + +.Lenc_out_4: + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END + RET + +.Lenc_out: + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_enc) + +/* + * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + /* encrypt message: */ + call __load_partial + + movdqa MSG, T0 + pxor STATE1, T0 + pxor STATE4, T0 + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, T0 + + call __store_partial + + aegis128_update + pxor MSG, STATE4 + + /* store the state: */ + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) + +.macro decrypt_block a s0 s1 s2 s3 s4 i + movdq\a (\i * 0x10)(SRC), MSG + pxor \s1, MSG + pxor \s4, MSG + movdqa \s2, T1 + pand \s3, T1 + pxor T1, MSG + movdq\a MSG, (\i * 0x10)(DST) + + aegis128_update + pxor MSG, \s4 + + sub $0x10, LEN + cmp $0x10, LEN + jl .Ldec_out_\i +.endm + +/* + * void crypto_aegis128_aesni_dec(void *state, unsigned int length, + * const void *src, void *dst); + */ +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Ldec_out + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + mov SRC, %r8 + or DST, %r8 + and $0xF, %r8 + jnz .Ldec_u_loop + +.align 8 +.Ldec_a_loop: + decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 + decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 + decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 + decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 + decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Ldec_a_loop + +.align 8 +.Ldec_u_loop: + decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 + decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 + decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 + decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 + decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Ldec_u_loop + + /* store the state: */ +.Ldec_out_0: + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END + RET + +.Ldec_out_1: + movdqu STATE3, 0x00(STATEP) + movdqu STATE4, 0x10(STATEP) + movdqu STATE0, 0x20(STATEP) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END + RET + +.Ldec_out_2: + movdqu STATE2, 0x00(STATEP) + movdqu STATE3, 0x10(STATEP) + movdqu STATE4, 0x20(STATEP) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END + RET + +.Ldec_out_3: + movdqu STATE1, 0x00(STATEP) + movdqu STATE2, 0x10(STATEP) + movdqu STATE3, 0x20(STATEP) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END + RET + +.Ldec_out_4: + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END + RET + +.Ldec_out: + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_dec) + +/* + * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + /* decrypt message: */ + call __load_partial + + pxor STATE1, MSG + pxor STATE4, MSG + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, MSG + + movdqa MSG, T0 + call __store_partial + + /* mask with byte count: */ + movq LEN, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + movdqa .Laegis128_counter, T1 + pcmpgtb T1, T0 + pand T0, MSG + + aegis128_update + pxor MSG, STATE4 + + /* store the state: */ + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) + +/* + * void crypto_aegis128_aesni_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +SYM_FUNC_START(crypto_aegis128_aesni_final) + FRAME_BEGIN + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + /* prepare length block: */ + movq %rdx, MSG + movq %rcx, T0 + pslldq $8, T0 + pxor T0, MSG + psllq $3, MSG /* multiply by 8 (to get bit count) */ + + pxor STATE3, MSG + + /* update state: */ + aegis128_update; pxor MSG, STATE4 + aegis128_update; pxor MSG, STATE3 + aegis128_update; pxor MSG, STATE2 + aegis128_update; pxor MSG, STATE1 + aegis128_update; pxor MSG, STATE0 + aegis128_update; pxor MSG, STATE4 + aegis128_update; pxor MSG, STATE3 + + /* xor tag: */ + movdqu (%rsi), MSG + + pxor STATE0, MSG + pxor STATE1, MSG + pxor STATE2, MSG + pxor STATE3, MSG + pxor STATE4, MSG + + movdqu MSG, (%rsi) + + FRAME_END + RET +SYM_FUNC_END(crypto_aegis128_aesni_final) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c new file mode 100644 index 000000000..462318900 --- /dev/null +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * The AEGIS-128 Authenticated-Encryption Algorithm + * Glue for AES-NI + SSE2 implementation + * + * Copyright (c) 2017-2018 Ondrej Mosnacek + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define AEGIS128_BLOCK_ALIGN 16 +#define AEGIS128_BLOCK_SIZE 16 +#define AEGIS128_NONCE_SIZE 16 +#define AEGIS128_STATE_BLOCKS 5 +#define AEGIS128_KEY_SIZE 16 +#define AEGIS128_MIN_AUTH_SIZE 8 +#define AEGIS128_MAX_AUTH_SIZE 16 + +asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv); + +asmlinkage void crypto_aegis128_aesni_ad( + void *state, unsigned int length, const void *data); + +asmlinkage void crypto_aegis128_aesni_enc( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_dec( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_enc_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_dec_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_final( + void *state, void *tag_xor, unsigned int cryptlen, + unsigned int assoclen); + +struct aegis_block { + u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN); +}; + +struct aegis_state { + struct aegis_block blocks[AEGIS128_STATE_BLOCKS]; +}; + +struct aegis_ctx { + struct aegis_block key; +}; + +struct aegis_crypt_ops { + int (*skcipher_walk_init)(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); + + void (*crypt_blocks)(void *state, unsigned int length, const void *src, + void *dst); + void (*crypt_tail)(void *state, unsigned int length, const void *src, + void *dst); +}; + +static void crypto_aegis128_aesni_process_ad( + struct aegis_state *state, struct scatterlist *sg_src, + unsigned int assoclen) +{ + struct scatter_walk walk; + struct aegis_block buf; + unsigned int pos = 0; + + scatterwalk_start(&walk, sg_src); + while (assoclen != 0) { + unsigned int size = scatterwalk_clamp(&walk, assoclen); + unsigned int left = size; + void *mapped = scatterwalk_map(&walk); + const u8 *src = (const u8 *)mapped; + + if (pos + size >= AEGIS128_BLOCK_SIZE) { + if (pos > 0) { + unsigned int fill = AEGIS128_BLOCK_SIZE - pos; + memcpy(buf.bytes + pos, src, fill); + crypto_aegis128_aesni_ad(state, + AEGIS128_BLOCK_SIZE, + buf.bytes); + pos = 0; + left -= fill; + src += fill; + } + + crypto_aegis128_aesni_ad(state, left, src); + + src += left & ~(AEGIS128_BLOCK_SIZE - 1); + left &= AEGIS128_BLOCK_SIZE - 1; + } + + memcpy(buf.bytes + pos, src, left); + pos += left; + assoclen -= size; + + scatterwalk_unmap(mapped); + scatterwalk_advance(&walk, size); + scatterwalk_done(&walk, 0, assoclen); + } + + if (pos > 0) { + memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); + crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); + } +} + +static void crypto_aegis128_aesni_process_crypt( + struct aegis_state *state, struct skcipher_walk *walk, + const struct aegis_crypt_ops *ops) +{ + while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { + ops->crypt_blocks(state, + round_down(walk->nbytes, AEGIS128_BLOCK_SIZE), + walk->src.virt.addr, walk->dst.virt.addr); + skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); + } + + if (walk->nbytes) { + ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, + walk->dst.virt.addr); + skcipher_walk_done(walk, 0); + } +} + +static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead) +{ + u8 *ctx = crypto_aead_ctx(aead); + ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx)); + return (void *)ctx; +} + +static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead); + + if (keylen != AEGIS128_KEY_SIZE) + return -EINVAL; + + memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE); + + return 0; +} + +static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + if (authsize > AEGIS128_MAX_AUTH_SIZE) + return -EINVAL; + if (authsize < AEGIS128_MIN_AUTH_SIZE) + return -EINVAL; + return 0; +} + +static void crypto_aegis128_aesni_crypt(struct aead_request *req, + struct aegis_block *tag_xor, + unsigned int cryptlen, + const struct aegis_crypt_ops *ops) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); + struct skcipher_walk walk; + struct aegis_state state; + + ops->skcipher_walk_init(&walk, req, true); + + kernel_fpu_begin(); + + crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); + crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); + crypto_aegis128_aesni_process_crypt(&state, &walk, ops); + crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + + kernel_fpu_end(); +} + +static int crypto_aegis128_aesni_encrypt(struct aead_request *req) +{ + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_encrypt, + .crypt_blocks = crypto_aegis128_aesni_enc, + .crypt_tail = crypto_aegis128_aesni_enc_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen; + + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + + scatterwalk_map_and_copy(tag.bytes, req->dst, + req->assoclen + cryptlen, authsize, 1); + return 0; +} + +static int crypto_aegis128_aesni_decrypt(struct aead_request *req) +{ + static const struct aegis_block zeros = {}; + + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_decrypt, + .crypt_blocks = crypto_aegis128_aesni_dec, + .crypt_tail = crypto_aegis128_aesni_dec_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + + scatterwalk_map_and_copy(tag.bytes, req->src, + req->assoclen + cryptlen, authsize, 0); + + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + + return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; +} + +static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead) +{ + return 0; +} + +static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) +{ +} + +static struct aead_alg crypto_aegis128_aesni_alg = { + .setkey = crypto_aegis128_aesni_setkey, + .setauthsize = crypto_aegis128_aesni_setauthsize, + .encrypt = crypto_aegis128_aesni_encrypt, + .decrypt = crypto_aegis128_aesni_decrypt, + .init = crypto_aegis128_aesni_init_tfm, + .exit = crypto_aegis128_aesni_exit_tfm, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS128_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128", + .cra_driver_name = "__aegis128-aesni", + + .cra_module = THIS_MODULE, + } +}; + +static struct simd_aead_alg *simd_alg; + +static int __init crypto_aegis128_aesni_module_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_XMM2) || + !boot_cpu_has(X86_FEATURE_AES) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) + return -ENODEV; + + return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1, + &simd_alg); +} + +static void __exit crypto_aegis128_aesni_module_exit(void) +{ + simd_unregister_aeads(&crypto_aegis128_aesni_alg, 1, &simd_alg); +} + +module_init(crypto_aegis128_aesni_module_init); +module_exit(crypto_aegis128_aesni_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek "); +MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation"); +MODULE_ALIAS_CRYPTO("aegis128"); +MODULE_ALIAS_CRYPTO("aegis128-aesni"); diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S new file mode 100644 index 000000000..2402b9418 --- /dev/null +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -0,0 +1,597 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ +/* + * AES CTR mode by8 optimization with AVX instructions. (x86_64) + * + * Copyright(c) 2014 Intel Corporation. + * + * Contact Information: + * James Guilford + * Sean Gulley + * Chandramouli Narayanan + */ +/* + * This is AES128/192/256 CTR mode optimization implementation. It requires + * the support of Intel(R) AESNI and AVX instructions. + * + * This work was inspired by the AES CTR mode optimization published + * in Intel Optimized IPSEC Cryptographic library. + * Additional information on it can be found at: + * https://github.com/intel/intel-ipsec-mb + */ + +#include + +#define VMOVDQ vmovdqu + +/* + * Note: the "x" prefix in these aliases means "this is an xmm register". The + * alias prefixes have no relation to XCTR where the "X" prefix means "XOR + * counter". + */ +#define xdata0 %xmm0 +#define xdata1 %xmm1 +#define xdata2 %xmm2 +#define xdata3 %xmm3 +#define xdata4 %xmm4 +#define xdata5 %xmm5 +#define xdata6 %xmm6 +#define xdata7 %xmm7 +#define xcounter %xmm8 // CTR mode only +#define xiv %xmm8 // XCTR mode only +#define xbyteswap %xmm9 // CTR mode only +#define xtmp %xmm9 // XCTR mode only +#define xkey0 %xmm10 +#define xkey4 %xmm11 +#define xkey8 %xmm12 +#define xkey12 %xmm13 +#define xkeyA %xmm14 +#define xkeyB %xmm15 + +#define p_in %rdi +#define p_iv %rsi +#define p_keys %rdx +#define p_out %rcx +#define num_bytes %r8 +#define counter %r9 // XCTR mode only +#define tmp %r10 +#define DDQ_DATA 0 +#define XDATA 1 +#define KEY_128 1 +#define KEY_192 2 +#define KEY_256 3 + +.section .rodata +.align 16 + +byteswap_const: + .octa 0x000102030405060708090A0B0C0D0E0F +ddq_low_msk: + .octa 0x0000000000000000FFFFFFFFFFFFFFFF +ddq_high_add_1: + .octa 0x00000000000000010000000000000000 +ddq_add_1: + .octa 0x00000000000000000000000000000001 +ddq_add_2: + .octa 0x00000000000000000000000000000002 +ddq_add_3: + .octa 0x00000000000000000000000000000003 +ddq_add_4: + .octa 0x00000000000000000000000000000004 +ddq_add_5: + .octa 0x00000000000000000000000000000005 +ddq_add_6: + .octa 0x00000000000000000000000000000006 +ddq_add_7: + .octa 0x00000000000000000000000000000007 +ddq_add_8: + .octa 0x00000000000000000000000000000008 + +.text + +/* generate a unique variable for ddq_add_x */ + +/* generate a unique variable for xmm register */ +.macro setxdata n + var_xdata = %xmm\n +.endm + +/* club the numeric 'id' to the symbol 'name' */ + +.macro club name, id +.altmacro + .if \name == XDATA + setxdata %\id + .endif +.noaltmacro +.endm + +/* + * do_aes num_in_par load_keys key_len + * This increments p_in, but not p_out + */ +.macro do_aes b, k, key_len, xctr + .set by, \b + .set load_keys, \k + .set klen, \key_len + + .if (load_keys) + vmovdqa 0*16(p_keys), xkey0 + .endif + + .if \xctr + movq counter, xtmp + .set i, 0 + .rept (by) + club XDATA, i + vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata + .set i, (i +1) + .endr + .set i, 0 + .rept (by) + club XDATA, i + vpxor xiv, var_xdata, var_xdata + .set i, (i +1) + .endr + .else + vpshufb xbyteswap, xcounter, xdata0 + .set i, 1 + .rept (by - 1) + club XDATA, i + vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata + vptest ddq_low_msk(%rip), var_xdata + jnz 1f + vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: + vpshufb xbyteswap, var_xdata, var_xdata + .set i, (i +1) + .endr + .endif + + vmovdqa 1*16(p_keys), xkeyA + + vpxor xkey0, xdata0, xdata0 + .if \xctr + add $by, counter + .else + vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter + vptest ddq_low_msk(%rip), xcounter + jnz 1f + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: + .endif + + .set i, 1 + .rept (by - 1) + club XDATA, i + vpxor xkey0, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 2*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 3*16(p_keys), xkey4 + .endif + .else + vmovdqa 3*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ + .set i, (i +1) + .endr + + add $(16*by), p_in + + .if (klen == KEY_128) + vmovdqa 4*16(p_keys), xkeyB + .else + .if (load_keys) + vmovdqa 4*16(p_keys), xkey4 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 3 */ + .if (klen == KEY_128) + vaesenc xkey4, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + vmovdqa 5*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + /* key 4 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey4, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 6*16(p_keys), xkey8 + .endif + .else + vmovdqa 6*16(p_keys), xkeyB + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ + .set i, (i +1) + .endr + + vmovdqa 7*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + /* key 6 */ + .if (klen == KEY_128) + vaesenc xkey8, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_128) + vmovdqa 8*16(p_keys), xkeyB + .else + .if (load_keys) + vmovdqa 8*16(p_keys), xkey8 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 9*16(p_keys), xkey12 + .endif + .else + vmovdqa 9*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 8 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey8, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + vmovdqa 10*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + /* key 9 */ + .if (klen == KEY_128) + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen != KEY_128) + vmovdqa 11*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 10 */ + .if (klen == KEY_128) + vaesenclast xkeyB, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen != KEY_128) + .if (load_keys) + vmovdqa 12*16(p_keys), xkey12 + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 13*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + .if (klen == KEY_256) + /* key 12 */ + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenclast xkey12, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 14*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + /* key 13 */ + vaesenc xkeyA, var_xdata, var_xdata + .set i, (i +1) + .endr + + .set i, 0 + .rept by + club XDATA, i + /* key 14 */ + vaesenclast xkeyB, var_xdata, var_xdata + .set i, (i +1) + .endr + .endif + .endif + + .set i, 0 + .rept (by / 2) + .set j, (i+1) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + VMOVDQ (j*16 - 16*by)(p_in), xkeyB + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + club XDATA, j + vpxor xkeyB, var_xdata, var_xdata + .set i, (i+2) + .endr + + .if (i < by) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + .endif + + .set i, 0 + .rept by + club XDATA, i + VMOVDQ var_xdata, i*16(p_out) + .set i, (i+1) + .endr +.endm + +.macro do_aes_load val, key_len, xctr + do_aes \val, 1, \key_len, \xctr +.endm + +.macro do_aes_noload val, key_len, xctr + do_aes \val, 0, \key_len, \xctr +.endm + +/* main body of aes ctr load */ + +.macro do_aes_ctrmain key_len, xctr + cmp $16, num_bytes + jb .Ldo_return2\xctr\key_len + + .if \xctr + shr $4, counter + vmovdqu (p_iv), xiv + .else + vmovdqa byteswap_const(%rip), xbyteswap + vmovdqu (p_iv), xcounter + vpshufb xbyteswap, xcounter, xcounter + .endif + + mov num_bytes, tmp + and $(7*16), tmp + jz .Lmult_of_8_blks\xctr\key_len + + /* 1 <= tmp <= 7 */ + cmp $(4*16), tmp + jg .Lgt4\xctr\key_len + je .Leq4\xctr\key_len + +.Llt4\xctr\key_len: + cmp $(2*16), tmp + jg .Leq3\xctr\key_len + je .Leq2\xctr\key_len + +.Leq1\xctr\key_len: + do_aes_load 1, \key_len, \xctr + add $(1*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + +.Leq2\xctr\key_len: + do_aes_load 2, \key_len, \xctr + add $(2*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + + +.Leq3\xctr\key_len: + do_aes_load 3, \key_len, \xctr + add $(3*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + +.Leq4\xctr\key_len: + do_aes_load 4, \key_len, \xctr + add $(4*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + +.Lgt4\xctr\key_len: + cmp $(6*16), tmp + jg .Leq7\xctr\key_len + je .Leq6\xctr\key_len + +.Leq5\xctr\key_len: + do_aes_load 5, \key_len, \xctr + add $(5*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + +.Leq6\xctr\key_len: + do_aes_load 6, \key_len, \xctr + add $(6*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + +.Leq7\xctr\key_len: + do_aes_load 7, \key_len, \xctr + add $(7*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\xctr\key_len + jmp .Lmain_loop2\xctr\key_len + +.Lmult_of_8_blks\xctr\key_len: + .if (\key_len != KEY_128) + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 4*16(p_keys), xkey4 + vmovdqa 8*16(p_keys), xkey8 + vmovdqa 12*16(p_keys), xkey12 + .else + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 3*16(p_keys), xkey4 + vmovdqa 6*16(p_keys), xkey8 + vmovdqa 9*16(p_keys), xkey12 + .endif +.align 16 +.Lmain_loop2\xctr\key_len: + /* num_bytes is a multiple of 8 and >0 */ + do_aes_noload 8, \key_len, \xctr + add $(8*16), p_out + sub $(8*16), num_bytes + jne .Lmain_loop2\xctr\key_len + +.Ldo_return2\xctr\key_len: + .if !\xctr + /* return updated IV */ + vpshufb xbyteswap, xcounter, xcounter + vmovdqu xcounter, (p_iv) + .endif + RET +.endm + +/* + * routine to do AES128 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +SYM_FUNC_START(aes_ctr_enc_128_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_128 0 + +SYM_FUNC_END(aes_ctr_enc_128_avx_by8) + +/* + * routine to do AES192 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +SYM_FUNC_START(aes_ctr_enc_192_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_192 0 + +SYM_FUNC_END(aes_ctr_enc_192_avx_by8) + +/* + * routine to do AES256 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +SYM_FUNC_START(aes_ctr_enc_256_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_256 0 + +SYM_FUNC_END(aes_ctr_enc_256_avx_by8) + +/* + * routine to do AES128 XCTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys, + * u8* out, unsigned int num_bytes, unsigned int byte_ctr) + */ +SYM_FUNC_START(aes_xctr_enc_128_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_128 1 + +SYM_FUNC_END(aes_xctr_enc_128_avx_by8) + +/* + * routine to do AES192 XCTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys, + * u8* out, unsigned int num_bytes, unsigned int byte_ctr) + */ +SYM_FUNC_START(aes_xctr_enc_192_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_192 1 + +SYM_FUNC_END(aes_xctr_enc_192_avx_by8) + +/* + * routine to do AES256 XCTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys, + * u8* out, unsigned int num_bytes, unsigned int byte_ctr) + */ +SYM_FUNC_START(aes_xctr_enc_256_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_256 1 + +SYM_FUNC_END(aes_xctr_enc_256_avx_by8) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S new file mode 100644 index 000000000..837c1e0aa --- /dev/null +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -0,0 +1,3161 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Implement AES algorithm in Intel AES-NI instructions. + * + * The white paper of AES-NI instructions can be downloaded from: + * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Kahraman Akdemir + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban + * James Guilford (james.guilford@intel.com) + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Wajdi Feghali (wajdi.k.feghali@intel.com) + * Copyright (c) 2010, Intel Corporation. + * + * Ported x86_64 version to x86: + * Author: Mathias Krause + */ + +#include +#include +#include + +/* + * The following macros are used to move an (un)aligned 16 byte value to/from + * an XMM register. This can done for either FP or integer values, for FP use + * movaps (move aligned packed single) or integer use movdqa (move double quad + * aligned). It doesn't make a performance difference which instruction is used + * since Nehalem (original Core i7) was released. However, the movaps is a byte + * shorter, so that is the one we'll use for now. (same for unaligned). + */ +#define MOVADQ movaps +#define MOVUDQ movups + +#ifdef __x86_64__ + +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.POLY, "aM", @progbits, 16 +.align 16 +POLY: .octa 0xC2000000000000000000000000000001 +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 +TWOONE: .octa 0x00000001000000000000000000000001 + +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 +SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst16.MASK1, "aM", @progbits, 16 +.align 16 +MASK1: .octa 0x0000000000000000ffffffffffffffff +.section .rodata.cst16.MASK2, "aM", @progbits, 16 +.align 16 +MASK2: .octa 0xffffffffffffffff0000000000000000 +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 +ONE: .octa 0x00000000000000000000000000000001 +.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 +.align 16 +F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +.section .rodata.cst16.dec, "aM", @progbits, 16 +.align 16 +dec: .octa 0x1 +.section .rodata.cst16.enc, "aM", @progbits, 16 +.align 16 +enc: .octa 0x2 + +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + +.text + + +#define STACK_OFFSET 8*3 + +#define AadHash 16*0 +#define AadLen 16*1 +#define InLen (16*1)+8 +#define PBlockEncKey 16*2 +#define OrigIV 16*3 +#define CurCount 16*4 +#define PBlockLen 16*5 +#define HashKey 16*6 // store HashKey <<1 mod poly here +#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 + // bits of HashKey <<1 mod poly here + //(for Karatsuba purposes) +#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 + // bits of HashKey^2 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 + // bits of HashKey^3 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 + // bits of HashKey^4 <<1 mod poly here + // (for Karatsuba purposes) + +#define arg1 rdi +#define arg2 rsi +#define arg3 rdx +#define arg4 rcx +#define arg5 r8 +#define arg6 r9 +#define arg7 STACK_OFFSET+8(%rsp) +#define arg8 STACK_OFFSET+16(%rsp) +#define arg9 STACK_OFFSET+24(%rsp) +#define arg10 STACK_OFFSET+32(%rsp) +#define arg11 STACK_OFFSET+40(%rsp) +#define keysize 2*15*16(%arg1) +#endif + + +#define STATE1 %xmm0 +#define STATE2 %xmm4 +#define STATE3 %xmm5 +#define STATE4 %xmm6 +#define STATE STATE1 +#define IN1 %xmm1 +#define IN2 %xmm7 +#define IN3 %xmm8 +#define IN4 %xmm9 +#define IN IN1 +#define KEY %xmm2 +#define IV %xmm3 + +#define BSWAP_MASK %xmm10 +#define CTR %xmm11 +#define INC %xmm12 + +#define GF128MUL_MASK %xmm7 + +#ifdef __x86_64__ +#define AREG %rax +#define KEYP %rdi +#define OUTP %rsi +#define UKEYP OUTP +#define INP %rdx +#define LEN %rcx +#define IVP %r8 +#define KLEN %r9d +#define T1 %r10 +#define TKEYP T1 +#define T2 %r11 +#define TCTR_LOW T2 +#else +#define AREG %eax +#define KEYP %edi +#define OUTP AREG +#define UKEYP OUTP +#define INP %edx +#define LEN %esi +#define IVP %ebp +#define KLEN %ebx +#define T1 %ecx +#define TKEYP T1 +#endif + +.macro FUNC_SAVE + push %r12 + push %r13 + push %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# +.endm + + +.macro FUNC_RESTORE + pop %r14 + pop %r13 + pop %r12 +.endm + +# Precompute hashkeys. +# Input: Hash subkey. +# Output: HashKeys stored in gcm_context_data. Only needs to be called +# once per key. +# clobbers r12, and tmp xmm registers. +.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 + mov \SUBKEY, %r12 + movdqu (%r12), \TMP3 + movdqa SHUF_MASK(%rip), \TMP2 + pshufb \TMP2, \TMP3 + + # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa \TMP3, \TMP2 + psllq $1, \TMP3 + psrlq $63, \TMP2 + movdqa \TMP2, \TMP1 + pslldq $8, \TMP2 + psrldq $8, \TMP1 + por \TMP2, \TMP3 + + # reduce HashKey<<1 + + pshufd $0x24, \TMP1, \TMP2 + pcmpeqd TWOONE(%rip), \TMP2 + pand POLY(%rip), \TMP2 + pxor \TMP2, \TMP3 + movdqu \TMP3, HashKey(%arg2) + + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqu \TMP1, HashKey_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqu \TMP5, HashKey_2(%arg2) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqu \TMP1, HashKey_2_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqu \TMP5, HashKey_3(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqu \TMP1, HashKey_3_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqu \TMP5, HashKey_4(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqu \TMP1, HashKey_4_k(%arg2) +.endm + +# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. +# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 +.macro GCM_INIT Iv SUBKEY AAD AADLEN + mov \AADLEN, %r11 + mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length + xor %r11d, %r11d + mov %r11, InLen(%arg2) # ctx_data.in_length = 0 + mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 + mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 + mov \Iv, %rax + movdqu (%rax), %xmm0 + movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv + + movdqa SHUF_MASK(%rip), %xmm2 + pshufb %xmm2, %xmm0 + movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv + + PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 + movdqu HashKey(%arg2), %xmm13 + + CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ + %xmm4, %xmm5, %xmm6 +.endm + +# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context +# struct has been initialized by GCM_INIT. +# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK +# Clobbers rax, r10-r13, and xmm0-xmm15 +.macro GCM_ENC_DEC operation + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%arg2), %xmm13 + add %arg5, InLen(%arg2) + + xor %r11d, %r11d # initialise the data pointer offset as zero + PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation + + sub %r11, %arg5 # sub partial block data used + mov %arg5, %r13 # save the number of bytes + + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 + # Encrypt/Decrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_\@ + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_\@ + je _initial_num_blocks_is_2_\@ +_initial_num_blocks_is_3_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation + sub $48, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_2_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation + sub $32, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_1_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation + sub $16, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_0_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation +_initial_blocks_\@: + + # Main loop - Encrypt/Decrypt remaining blocks + + test %r13, %r13 + je _zero_cipher_left_\@ + sub $64, %r13 + je _four_cipher_left_\@ +_crypt_by_4_\@: + GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ + %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ + %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _crypt_by_4_\@ +_four_cipher_left_\@: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_\@: + movdqu %xmm8, AadHash(%arg2) + movdqu %xmm0, CurCount(%arg2) + + mov %arg5, %r13 + and $15, %r13 # %r13 = arg5 (mod 16) + je _multiple_of_16_bytes_\@ + + mov %r13, PBlockLen(%arg2) + + # Handle the last <16 Byte block separately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqu %xmm0, CurCount(%arg2) + movdqa SHUF_MASK(%rip), %xmm10 + pshufb %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + movdqu %xmm0, PBlockEncKey(%arg2) + + cmp $16, %arg5 + jge _large_enough_update_\@ + + lea (%arg4,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + jmp _data_read_\@ + +_large_enough_update_\@: + sub $16, %r11 + add %r13, %r11 + + # receive the last <16 Byte block + movdqu (%arg4, %r11, 1), %xmm1 + + sub %r13, %r11 + add $16, %r11 + + lea SHIFT_MASK+16(%rip), %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (r13 is the number of bytes in plaintext mod 16) + sub %r13, %r12 + # get the appropriate shuffle mask + movdqu (%r12), %xmm2 + # shift right 16-r13 bytes + pshufb %xmm2, %xmm1 + +_data_read_\@: + lea ALL_F+16(%rip), %r12 + sub %r13, %r12 + +.ifc \operation, dec + movdqa %xmm1, %xmm2 +.endif + pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) + movdqu (%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 +.ifc \operation, dec + pand %xmm1, %xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + pshufb %xmm10 ,%xmm2 + + pxor %xmm2, %xmm8 +.else + movdqa SHUF_MASK(%rip), %xmm10 + pshufb %xmm10,%xmm0 + + pxor %xmm0, %xmm8 +.endif + + movdqu %xmm8, AadHash(%arg2) +.ifc \operation, enc + # GHASH computation for the last <16 byte block + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm0 back to output as ciphertext + pshufb %xmm10, %xmm0 +.endif + + # Output %r13 bytes + movq %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + mov %rax, (%arg3 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + movq %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + mov %al, (%arg3, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_multiple_of_16_bytes_\@: +.endm + +# GCM_COMPLETE Finishes update of tag of last partial block +# Output: Authorization Tag (AUTH_TAG) +# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 +.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%arg2), %xmm13 + + mov PBlockLen(%arg2), %r12 + + test %r12, %r12 + je _partial_done\@ + + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + +_partial_done\@: + mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + mov InLen(%arg2), %r12 + shl $3, %r12 # len(C) in bits (*128) + movq %r12, %xmm1 + + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + pshufb %xmm10, %xmm8 + + movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_\@: + mov \AUTHTAG, %r10 # %r10 = authTag + mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_\@ + cmp $8, %r11 + jl _T_4_\@ +_T_8_\@: + movq %xmm0, %rax + mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 + psrldq $8, %xmm0 + test %r11, %r11 + je _return_T_done_\@ +_T_4_\@: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + test %r11, %r11 + je _return_T_done_\@ +_T_123_\@: + movd %xmm0, %eax + cmp $2, %r11 + jl _T_1_\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_\@ + add $2, %r10 + sar $16, %eax +_T_1_\@: + mov %al, (%r10) + jmp _return_T_done_\@ +_T_16_\@: + movdqu %xmm0, (%r10) +_return_T_done_\@: +.endm + +#ifdef __x86_64__ +/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) +* +* +* Input: A and B (128-bits each, bit-reflected) +* Output: C = A*B*x mod poly, (i.e. >>1 ) +* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input +* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. +* +*/ +.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 + movdqa \GH, \TMP1 + pshufd $78, \GH, \TMP2 + pshufd $78, \HK, \TMP3 + pxor \GH, \TMP2 # TMP2 = a1+a0 + pxor \HK, \TMP3 # TMP3 = b1+b0 + pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1 + pclmulqdq $0x00, \HK, \GH # GH = a0*b0 + pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) + pxor \GH, \TMP2 + pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \GH + pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK + + # first phase of the reduction + + movdqa \GH, \TMP2 + movdqa \GH, \TMP3 + movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + pslld $31, \TMP2 # packed right shift <<31 + pslld $30, \TMP3 # packed right shift <<30 + pslld $25, \TMP4 # packed right shift <<25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift TMP5 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \GH + + # second phase of the reduction + + movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + movdqa \GH,\TMP3 + movdqa \GH,\TMP4 + psrld $1,\TMP2 # packed left shift >>1 + psrld $2,\TMP3 # packed left shift >>2 + psrld $7,\TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \GH + pxor \TMP1, \GH # result is in TMP1 +.endm + +# Reads DLEN bytes starting at DPTR and stores in XMMDst +# where 0 < DLEN < 16 +# Clobbers %rax, DLEN and XMM1 +.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst + cmp $8, \DLEN + jl _read_lt8_\@ + mov (\DPTR), %rax + movq %rax, \XMMDst + sub $8, \DLEN + jz _done_read_partial_block_\@ + xor %eax, %eax +_read_next_byte_\@: + shl $8, %rax + mov 7(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_\@ + movq %rax, \XMM1 + pslldq $8, \XMM1 + por \XMM1, \XMMDst + jmp _done_read_partial_block_\@ +_read_lt8_\@: + xor %eax, %eax +_read_next_byte_lt8_\@: + shl $8, %rax + mov -1(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_lt8_\@ + movq %rax, \XMMDst +_done_read_partial_block_\@: +.endm + +# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. +# clobbers r10-11, xmm14 +.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ + TMP6 TMP7 + MOVADQ SHUF_MASK(%rip), %xmm14 + mov \AAD, %r10 # %r10 = AAD + mov \AADLEN, %r11 # %r11 = aadLen + pxor \TMP7, \TMP7 + pxor \TMP6, \TMP6 + + cmp $16, %r11 + jl _get_AAD_rest\@ +_get_AAD_blocks\@: + movdqu (%r10), \TMP7 + pshufb %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP7, \TMP6 + GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 + add $16, %r10 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + + movdqu \TMP6, \TMP7 + + /* read the last <16B of AAD */ +_get_AAD_rest\@: + test %r11, %r11 + je _get_AAD_done\@ + + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 + pshufb %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP6, \TMP7 + GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 + movdqu \TMP7, \TMP6 + +_get_AAD_done\@: + movdqu \TMP6, AadHash(%arg2) +.endm + +# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks +# between update calls. +# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK +# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context +# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 +.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH operation + mov PBlockLen(%arg2), %r13 + test %r13, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN + jl _fewer_than_16_bytes_\@ + movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm + jmp _data_read_\@ + +_fewer_than_16_bytes_\@: + lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 + mov \PLAIN_CYPH_LEN, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 + + mov PBlockLen(%arg2), %r13 + +_data_read_\@: # Finished reading in data + + movdqu PBlockEncKey(%arg2), %xmm9 + movdqu HashKey(%arg2), %xmm13 + + lea SHIFT_MASK(%rip), %r12 + + # adjust the shuffle mask pointer to be able to shift r13 bytes + # r16-r13 is the number of bytes in plaintext mod 16) + add %r13, %r12 + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + pshufb %xmm2, %xmm9 # shift right r13 bytes + +.ifc \operation, dec + movdqa %xmm1, %xmm3 + pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_1_\@ + sub %r10, %r12 +_no_extra_mask_1_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 + + pand %xmm1, %xmm3 + movdqa SHUF_MASK(%rip), %xmm10 + pshufb %xmm10, %xmm3 + pshufb %xmm2, %xmm3 + pxor %xmm3, \AAD_HASH + + test %r10, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %eax, %eax + + mov %rax, PBlockLen(%arg2) + jmp _dec_done_\@ +_partial_incomplete_1_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_dec_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) +.else + pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_2_\@ + sub %r10, %r12 +_no_extra_mask_2_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 + + movdqa SHUF_MASK(%rip), %xmm1 + pshufb %xmm1, %xmm9 + pshufb %xmm2, %xmm9 + pxor %xmm9, \AAD_HASH + + test %r10, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %eax, %eax + + mov %rax, PBlockLen(%arg2) + jmp _encode_done_\@ +_partial_incomplete_2_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_encode_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) + + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm9 back to output as ciphertext + pshufb %xmm10, %xmm9 + pshufb %xmm2, %xmm9 +.endif + # output encrypted Bytes + test %r10, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 + # Set r13 to be the number of bytes to write out + sub %r12, %r13 + jmp _count_set_\@ +_partial_fill_\@: + mov \PLAIN_CYPH_LEN, %r13 +_count_set_\@: + movdqa %xmm9, %xmm0 + movq %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + + mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $8, \DATA_OFFSET + psrldq $8, %xmm0 + movq %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $1, \DATA_OFFSET + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_partial_block_done_\@: +.endm # PARTIAL_BLOCK + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ + XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + MOVADQ SHUF_MASK(%rip), %xmm14 + + movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 + + # start AES for num_initial_blocks blocks + + movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 + +.if (\i == 5) || (\i == 6) || (\i == 7) + + MOVADQ ONE(%RIP),\TMP1 + MOVADQ 0(%arg1),\TMP2 +.irpc index, \i_seq + paddd \TMP1, \XMM0 # INCR Y0 +.ifc \operation, dec + movdqa \XMM0, %xmm\index +.else + MOVADQ \XMM0, %xmm\index +.endif + pshufb %xmm14, %xmm\index # perform a 16 byte swap + pxor \TMP2, %xmm\index +.endr + lea 0x10(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + add $5,%eax # 128->9, 192->11, 256->13 + +aes_loop_initial_\@: + MOVADQ (%r10),\TMP1 +.irpc index, \i_seq + aesenc \TMP1, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_initial_\@ + + MOVADQ (%r10), \TMP1 +.irpc index, \i_seq + aesenclast \TMP1, %xmm\index # Last Round +.endr +.irpc index, \i_seq + movdqu (%arg4 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg3 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 + +.ifc \operation, dec + movdqa \TMP1, %xmm\index +.endif + pshufb %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\@ + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + MOVADQ ONE(%RIP),\TMP1 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM1 + pshufb %xmm14, \XMM1 # perform a 16 byte swap + + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM2 + pshufb %xmm14, \XMM2 # perform a 16 byte swap + + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM3 + pshufb %xmm14, \XMM3 # perform a 16 byte swap + + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM4 + pshufb %xmm14, \XMM4 # perform a 16 byte swap + + MOVADQ 0(%arg1),\TMP1 + pxor \TMP1, \XMM1 + pxor \TMP1, \XMM2 + pxor \TMP1, \XMM3 + pxor \TMP1, \XMM4 +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + aesenc \TMP1, \XMM1 + aesenc \TMP1, \XMM2 + aesenc \TMP1, \XMM3 + aesenc \TMP1, \XMM4 +.endr +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + aesenc \TMP1, \XMM1 + aesenc \TMP1, \XMM2 + aesenc \TMP1, \XMM3 + aesenc \TMP1, \XMM4 +.endr + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_pre_done\@ + +aes_loop_pre_\@: + MOVADQ (%r10),\TMP2 +.irpc index, 1234 + aesenc \TMP2, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_pre_\@ + +aes_loop_pre_done\@: + MOVADQ (%r10), \TMP2 + aesenclast \TMP2, \XMM1 + aesenclast \TMP2, \XMM2 + aesenclast \TMP2, \XMM3 + aesenclast \TMP2, \XMM4 + movdqu 16*0(%arg4 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 +.ifc \operation, dec + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM1 +.endif + movdqu 16*1(%arg4 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 +.ifc \operation, dec + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM2 +.endif + movdqu 16*2(%arg4 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 +.ifc \operation, dec + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM3 +.endif + movdqu 16*3(%arg4 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 +.ifc \operation, dec + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM4 +.else + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) +.endif + + add $64, %r11 + pshufb %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + pshufb %xmm14, \XMM2 # perform a 16 byte swap + pshufb %xmm14, \XMM3 # perform a 16 byte swap + pshufb %xmm14, \XMM4 # perform a 16 byte swap + +_initial_blocks_done\@: + +.endm + +/* +* encrypt 4 blocks at a time +* ghash the 4 previously encrypted ciphertext blocks +* arg1, %arg3, %arg4 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqu HashKey_4(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + pshufb %xmm15, \XMM1 # perform a 16 byte swap + pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + pshufb %xmm15, \XMM2 # perform a 16 byte swap + pshufb %xmm15, \XMM3 # perform a 16 byte swap + pshufb %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqu HashKey_4_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + aesenc \TMP1, \XMM1 # Round 1 + aesenc \TMP1, \XMM2 + aesenc \TMP1, \XMM3 + aesenc \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + aesenc \TMP1, \XMM1 # Round 2 + aesenc \TMP1, \XMM2 + aesenc \TMP1, \XMM3 + aesenc \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqu HashKey_3(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 3 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 4 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + movdqu HashKey_3_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 5 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqu HashKey_2(%arg2), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 6 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 7 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + movdqu HashKey_2_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 8 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqu HashKey(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 9 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_par_enc_done\@ + +aes_loop_par_enc\@: + MOVADQ (%r10),\TMP3 +.irpc index, 1234 + aesenc \TMP3, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_par_enc\@ + +aes_loop_par_enc_done\@: + MOVADQ (%r10), \TMP3 + aesenclast \TMP3, \XMM1 # Round 10 + aesenclast \TMP3, \XMM2 + aesenclast \TMP3, \XMM3 + aesenclast \TMP3, \XMM4 + movdqu HashKey_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu 16(%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu 32(%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu 48(%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer + pshufb %xmm15, \XMM1 # perform a 16 byte swap + pshufb %xmm15, \XMM2 # perform a 16 byte swap + pshufb %xmm15, \XMM3 # perform a 16 byte swap + pshufb %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* +* decrypt 4 blocks at a time +* ghash the 4 previously decrypted ciphertext blocks +* arg1, %arg3, %arg4 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqu HashKey_4(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + pshufb %xmm15, \XMM1 # perform a 16 byte swap + pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + pshufb %xmm15, \XMM2 # perform a 16 byte swap + pshufb %xmm15, \XMM3 # perform a 16 byte swap + pshufb %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqu HashKey_4_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + aesenc \TMP1, \XMM1 # Round 1 + aesenc \TMP1, \XMM2 + aesenc \TMP1, \XMM3 + aesenc \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + aesenc \TMP1, \XMM1 # Round 2 + aesenc \TMP1, \XMM2 + aesenc \TMP1, \XMM3 + aesenc \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqu HashKey_3(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 3 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 4 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + movdqu HashKey_3_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 5 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqu HashKey_2(%arg2), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 6 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 7 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + movdqu HashKey_2_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 8 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqu HashKey(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + aesenc \TMP3, \XMM1 # Round 9 + aesenc \TMP3, \XMM2 + aesenc \TMP3, \XMM3 + aesenc \TMP3, \XMM4 + pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_par_dec_done\@ + +aes_loop_par_dec\@: + MOVADQ (%r10),\TMP3 +.irpc index, 1234 + aesenc \TMP3, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_par_dec\@ + +aes_loop_par_dec_done\@: + MOVADQ (%r10), \TMP3 + aesenclast \TMP3, \XMM1 # last round + aesenclast \TMP3, \XMM2 + aesenclast \TMP3, \XMM3 + aesenclast \TMP3, \XMM4 + movdqu HashKey_k(%arg2), \TMP5 + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM1 + movdqu 16(%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM2 + movdqu 32(%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM3 + movdqu 48(%arg4,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM4 + pshufb %xmm15, \XMM1 # perform a 16 byte swap + pshufb %xmm15, \XMM2 # perform a 16 byte swap + pshufb %xmm15, \XMM3 # perform a 16 byte swap + pshufb %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* GHASH the last 4 ciphertext blocks. */ +.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ +TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst + + # Multiply TMP6 * HashKey (using Karatsuba) + + movdqa \XMM1, \TMP6 + pshufd $78, \XMM1, \TMP2 + pxor \XMM1, \TMP2 + movdqu HashKey_4(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1 + pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0 + movdqu HashKey_4_k(%arg2), \TMP4 + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqa \XMM1, \XMMDst + movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM2, \TMP1 + pshufd $78, \XMM2, \TMP2 + pxor \XMM2, \TMP2 + movdqu HashKey_3(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0 + movdqu HashKey_3_k(%arg2), \TMP4 + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM2, \XMMDst + pxor \TMP2, \XMM1 +# results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM3, \TMP1 + pshufd $78, \XMM3, \TMP2 + pxor \XMM3, \TMP2 + movdqu HashKey_2(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0 + movdqu HashKey_2_k(%arg2), \TMP4 + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM3, \XMMDst + pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + movdqa \XMM4, \TMP1 + pshufd $78, \XMM4, \TMP2 + pxor \XMM4, \TMP2 + movdqu HashKey(%arg2), \TMP5 + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0 + movdqu HashKey_k(%arg2), \TMP4 + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM4, \XMMDst + pxor \XMM1, \TMP2 + pxor \TMP6, \TMP2 + pxor \XMMDst, \TMP2 + # middle section of the temp results combined as in karatsuba algorithm + movdqa \TMP2, \TMP4 + pslldq $8, \TMP4 # left shift TMP4 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP4, \XMMDst + pxor \TMP2, \TMP6 +# TMP6:XMMDst holds the result of the accumulated carry-less multiplications + # first phase of the reduction + movdqa \XMMDst, \TMP2 + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 +# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently + pslld $31, \TMP2 # packed right shifting << 31 + pslld $30, \TMP3 # packed right shifting << 30 + pslld $25, \TMP4 # packed right shifting << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP7 + psrldq $4, \TMP7 # right shift TMP7 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \XMMDst + + # second phase of the reduction + movdqa \XMMDst, \TMP2 + # make 3 copies of XMMDst for doing 3 shift operations + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 + psrld $1, \TMP2 # packed left shift >> 1 + psrld $2, \TMP3 # packed left shift >> 2 + psrld $7, \TMP4 # packed left shift >> 7 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + pxor \TMP7, \TMP2 + pxor \TMP2, \XMMDst + pxor \TMP6, \XMMDst # reduced result is in XMMDst +.endm + + +/* Encryption of a single block +* uses eax & r10 +*/ + +.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + + pxor (%arg1), \XMM0 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + add $5,%eax # 128->9, 192->11, 256->13 + lea 16(%arg1), %r10 # get first expanded key address + +_esb_loop_\@: + MOVADQ (%r10),\TMP1 + aesenc \TMP1,\XMM0 + add $16,%r10 + sub $1,%eax + jnz _esb_loop_\@ + + MOVADQ (%r10),\TMP1 + aesenclast \TMP1,\XMM0 +.endm +/***************************************************************************** +* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data +* u8 *out, // Plaintext output. Encrypt in-place is allowed. +* const u8 *in, // Ciphertext input +* u64 plaintext_len, // Length of data in bytes for decryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the +* // given authentication tag and only return the plaintext if they match. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 +* // (most likely), 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the first +* set of 11 keys in the data structure void *aes_ctx +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +* +*****************************************************************************/ +SYM_FUNC_START(aesni_gcm_dec) + FUNC_SAVE + + GCM_INIT %arg6, arg7, arg8, arg9 + GCM_ENC_DEC dec + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_dec) + + +/***************************************************************************** +* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the +* first set of 11 keys in the data structure void *aes_ctx +* +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +***************************************************************************/ +SYM_FUNC_START(aesni_gcm_enc) + FUNC_SAVE + + GCM_INIT %arg6, arg7, arg8, arg9 + GCM_ENC_DEC enc + + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_enc) + +/***************************************************************************** +* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len) // Length of AAD in bytes. +*/ +SYM_FUNC_START(aesni_gcm_init) + FUNC_SAVE + GCM_INIT %arg3, %arg4,%arg5, %arg6 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_init) + +/***************************************************************************** +* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +SYM_FUNC_START(aesni_gcm_enc_update) + FUNC_SAVE + GCM_ENC_DEC enc + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_enc_update) + +/***************************************************************************** +* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +SYM_FUNC_START(aesni_gcm_dec_update) + FUNC_SAVE + GCM_ENC_DEC dec + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_dec_update) + +/***************************************************************************** +* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +*/ +SYM_FUNC_START(aesni_gcm_finalize) + FUNC_SAVE + GCM_COMPLETE %arg3 %arg4 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_finalize) + +#endif + +SYM_FUNC_START_LOCAL(_key_expansion_256a) + pshufd $0b11111111, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + movaps %xmm0, (TKEYP) + add $0x10, TKEYP + RET +SYM_FUNC_END(_key_expansion_256a) +SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) + +SYM_FUNC_START_LOCAL(_key_expansion_192a) + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + movaps %xmm2, %xmm6 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, %xmm1 + shufps $0b01000100, %xmm0, %xmm6 + movaps %xmm6, (TKEYP) + shufps $0b01001110, %xmm2, %xmm1 + movaps %xmm1, 0x10(TKEYP) + add $0x20, TKEYP + RET +SYM_FUNC_END(_key_expansion_192a) + +SYM_FUNC_START_LOCAL(_key_expansion_192b) + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, (TKEYP) + add $0x10, TKEYP + RET +SYM_FUNC_END(_key_expansion_192b) + +SYM_FUNC_START_LOCAL(_key_expansion_256b) + pshufd $0b10101010, %xmm1, %xmm1 + shufps $0b00010000, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + shufps $0b10001100, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + pxor %xmm1, %xmm2 + movaps %xmm2, (TKEYP) + add $0x10, TKEYP + RET +SYM_FUNC_END(_key_expansion_256b) + +/* + * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + * unsigned int key_len) + */ +SYM_FUNC_START(aesni_set_key) + FRAME_BEGIN +#ifndef __x86_64__ + pushl KEYP + movl (FRAME_OFFSET+8)(%esp), KEYP # ctx + movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key + movl (FRAME_OFFSET+16)(%esp), %edx # key_len +#endif + movups (UKEYP), %xmm0 # user key (first 16 bytes) + movaps %xmm0, (KEYP) + lea 0x10(KEYP), TKEYP # key addr + movl %edx, 480(KEYP) + pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x + cmp $24, %dl + jb .Lenc_key128 + je .Lenc_key192 + movups 0x10(UKEYP), %xmm2 # other user key + movaps %xmm2, (TKEYP) + add $0x10, TKEYP + aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 + call _key_expansion_256a + aeskeygenassist $0x1, %xmm0, %xmm1 + call _key_expansion_256b + aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 + call _key_expansion_256a + aeskeygenassist $0x2, %xmm0, %xmm1 + call _key_expansion_256b + aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 + call _key_expansion_256a + aeskeygenassist $0x4, %xmm0, %xmm1 + call _key_expansion_256b + aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 + call _key_expansion_256a + aeskeygenassist $0x8, %xmm0, %xmm1 + call _key_expansion_256b + aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 + call _key_expansion_256a + aeskeygenassist $0x10, %xmm0, %xmm1 + call _key_expansion_256b + aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 + call _key_expansion_256a + aeskeygenassist $0x20, %xmm0, %xmm1 + call _key_expansion_256b + aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 + call _key_expansion_256a + jmp .Ldec_key +.Lenc_key192: + movq 0x10(UKEYP), %xmm2 # other user key + aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 + call _key_expansion_192a + aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 + call _key_expansion_192b + aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 + call _key_expansion_192a + aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 + call _key_expansion_192b + aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 + call _key_expansion_192a + aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 + call _key_expansion_192b + aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 + call _key_expansion_192a + aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 + call _key_expansion_192b + jmp .Ldec_key +.Lenc_key128: + aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 + call _key_expansion_128 + aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 + call _key_expansion_128 + aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 + call _key_expansion_128 + aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 + call _key_expansion_128 + aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 + call _key_expansion_128 + aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 + call _key_expansion_128 + aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 + call _key_expansion_128 + aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 + call _key_expansion_128 + aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 + call _key_expansion_128 + aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 + call _key_expansion_128 +.Ldec_key: + sub $0x10, TKEYP + movaps (KEYP), %xmm0 + movaps (TKEYP), %xmm1 + movaps %xmm0, 240(TKEYP) + movaps %xmm1, 240(KEYP) + add $0x10, KEYP + lea 240-16(TKEYP), UKEYP +.align 4 +.Ldec_key_loop: + movaps (KEYP), %xmm0 + aesimc %xmm0, %xmm1 + movaps %xmm1, (UKEYP) + add $0x10, KEYP + sub $0x10, UKEYP + cmp TKEYP, KEYP + jb .Ldec_key_loop + xor AREG, AREG +#ifndef __x86_64__ + popl KEYP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_set_key) + +/* + * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) + */ +SYM_FUNC_START(aesni_enc) + FRAME_BEGIN +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+12)(%esp), KEYP # ctx + movl (FRAME_OFFSET+16)(%esp), OUTP # dst + movl (FRAME_OFFSET+20)(%esp), INP # src +#endif + movl 480(KEYP), KLEN # key length + movups (INP), STATE # input + call _aesni_enc1 + movups STATE, (OUTP) # output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_enc) + +/* + * _aesni_enc1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +SYM_FUNC_START_LOCAL(_aesni_enc1) + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Lenc128 + lea 0x20(TKEYP), TKEYP + je .Lenc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + aesenc KEY, STATE + movaps -0x50(TKEYP), KEY + aesenc KEY, STATE +.align 4 +.Lenc192: + movaps -0x40(TKEYP), KEY + aesenc KEY, STATE + movaps -0x30(TKEYP), KEY + aesenc KEY, STATE +.align 4 +.Lenc128: + movaps -0x20(TKEYP), KEY + aesenc KEY, STATE + movaps -0x10(TKEYP), KEY + aesenc KEY, STATE + movaps (TKEYP), KEY + aesenc KEY, STATE + movaps 0x10(TKEYP), KEY + aesenc KEY, STATE + movaps 0x20(TKEYP), KEY + aesenc KEY, STATE + movaps 0x30(TKEYP), KEY + aesenc KEY, STATE + movaps 0x40(TKEYP), KEY + aesenc KEY, STATE + movaps 0x50(TKEYP), KEY + aesenc KEY, STATE + movaps 0x60(TKEYP), KEY + aesenc KEY, STATE + movaps 0x70(TKEYP), KEY + aesenclast KEY, STATE + RET +SYM_FUNC_END(_aesni_enc1) + +/* + * _aesni_enc4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +SYM_FUNC_START_LOCAL(_aesni_enc4) + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4enc128 + lea 0x20(TKEYP), TKEYP + je .L4enc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps -0x50(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 +#.align 4 +.L4enc192: + movaps -0x40(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps -0x30(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 +#.align 4 +.L4enc128: + movaps -0x20(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps -0x10(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps (TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x10(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x20(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x30(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x40(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x50(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x60(TKEYP), KEY + aesenc KEY, STATE1 + aesenc KEY, STATE2 + aesenc KEY, STATE3 + aesenc KEY, STATE4 + movaps 0x70(TKEYP), KEY + aesenclast KEY, STATE1 # last round + aesenclast KEY, STATE2 + aesenclast KEY, STATE3 + aesenclast KEY, STATE4 + RET +SYM_FUNC_END(_aesni_enc4) + +/* + * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) + */ +SYM_FUNC_START(aesni_dec) + FRAME_BEGIN +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+12)(%esp), KEYP # ctx + movl (FRAME_OFFSET+16)(%esp), OUTP # dst + movl (FRAME_OFFSET+20)(%esp), INP # src +#endif + mov 480(KEYP), KLEN # key length + add $240, KEYP + movups (INP), STATE # input + call _aesni_dec1 + movups STATE, (OUTP) #output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_dec) + +/* + * _aesni_dec1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +SYM_FUNC_START_LOCAL(_aesni_dec1) + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Ldec128 + lea 0x20(TKEYP), TKEYP + je .Ldec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + aesdec KEY, STATE + movaps -0x50(TKEYP), KEY + aesdec KEY, STATE +.align 4 +.Ldec192: + movaps -0x40(TKEYP), KEY + aesdec KEY, STATE + movaps -0x30(TKEYP), KEY + aesdec KEY, STATE +.align 4 +.Ldec128: + movaps -0x20(TKEYP), KEY + aesdec KEY, STATE + movaps -0x10(TKEYP), KEY + aesdec KEY, STATE + movaps (TKEYP), KEY + aesdec KEY, STATE + movaps 0x10(TKEYP), KEY + aesdec KEY, STATE + movaps 0x20(TKEYP), KEY + aesdec KEY, STATE + movaps 0x30(TKEYP), KEY + aesdec KEY, STATE + movaps 0x40(TKEYP), KEY + aesdec KEY, STATE + movaps 0x50(TKEYP), KEY + aesdec KEY, STATE + movaps 0x60(TKEYP), KEY + aesdec KEY, STATE + movaps 0x70(TKEYP), KEY + aesdeclast KEY, STATE + RET +SYM_FUNC_END(_aesni_dec1) + +/* + * _aesni_dec4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +SYM_FUNC_START_LOCAL(_aesni_dec4) + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4dec128 + lea 0x20(TKEYP), TKEYP + je .L4dec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps -0x50(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 +.align 4 +.L4dec192: + movaps -0x40(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps -0x30(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 +.align 4 +.L4dec128: + movaps -0x20(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps -0x10(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps (TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x10(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x20(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x30(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x40(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x50(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x60(TKEYP), KEY + aesdec KEY, STATE1 + aesdec KEY, STATE2 + aesdec KEY, STATE3 + aesdec KEY, STATE4 + movaps 0x70(TKEYP), KEY + aesdeclast KEY, STATE1 # last round + aesdeclast KEY, STATE2 + aesdeclast KEY, STATE3 + aesdeclast KEY, STATE4 + RET +SYM_FUNC_END(_aesni_dec4) + +/* + * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len) + */ +SYM_FUNC_START(aesni_ecb_enc) + FRAME_BEGIN +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+16)(%esp), KEYP # ctx + movl (FRAME_OFFSET+20)(%esp), OUTP # dst + movl (FRAME_OFFSET+24)(%esp), INP # src + movl (FRAME_OFFSET+28)(%esp), LEN # len +#endif + test LEN, LEN # check length + jz .Lecb_enc_ret + mov 480(KEYP), KLEN + cmp $16, LEN + jb .Lecb_enc_ret + cmp $64, LEN + jb .Lecb_enc_loop1 +.align 4 +.Lecb_enc_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_enc4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_enc_loop4 + cmp $16, LEN + jb .Lecb_enc_ret +.align 4 +.Lecb_enc_loop1: + movups (INP), STATE1 + call _aesni_enc1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_enc_loop1 +.Lecb_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_ecb_enc) + +/* + * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len); + */ +SYM_FUNC_START(aesni_ecb_dec) + FRAME_BEGIN +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+16)(%esp), KEYP # ctx + movl (FRAME_OFFSET+20)(%esp), OUTP # dst + movl (FRAME_OFFSET+24)(%esp), INP # src + movl (FRAME_OFFSET+28)(%esp), LEN # len +#endif + test LEN, LEN + jz .Lecb_dec_ret + mov 480(KEYP), KLEN + add $240, KEYP + cmp $16, LEN + jb .Lecb_dec_ret + cmp $64, LEN + jb .Lecb_dec_loop1 +.align 4 +.Lecb_dec_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_dec4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_dec_loop4 + cmp $16, LEN + jb .Lecb_dec_ret +.align 4 +.Lecb_dec_loop1: + movups (INP), STATE1 + call _aesni_dec1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_dec_loop1 +.Lecb_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_ecb_dec) + +/* + * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_cbc_enc) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv +#endif + cmp $16, LEN + jb .Lcbc_enc_ret + mov 480(KEYP), KLEN + movups (IVP), STATE # load iv as initial state +.align 4 +.Lcbc_enc_loop: + movups (INP), IN # load input + pxor IN, STATE + call _aesni_enc1 + movups STATE, (OUTP) # store output + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_enc_loop + movups STATE, (IVP) +.Lcbc_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_cbc_enc) + +/* + * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_cbc_dec) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv +#endif + cmp $16, LEN + jb .Lcbc_dec_just_ret + mov 480(KEYP), KLEN + add $240, KEYP + movups (IVP), IV + cmp $64, LEN + jb .Lcbc_dec_loop1 +.align 4 +.Lcbc_dec_loop4: + movups (INP), IN1 + movaps IN1, STATE1 + movups 0x10(INP), IN2 + movaps IN2, STATE2 +#ifdef __x86_64__ + movups 0x20(INP), IN3 + movaps IN3, STATE3 + movups 0x30(INP), IN4 + movaps IN4, STATE4 +#else + movups 0x20(INP), IN1 + movaps IN1, STATE3 + movups 0x30(INP), IN2 + movaps IN2, STATE4 +#endif + call _aesni_dec4 + pxor IV, STATE1 +#ifdef __x86_64__ + pxor IN1, STATE2 + pxor IN2, STATE3 + pxor IN3, STATE4 + movaps IN4, IV +#else + pxor IN1, STATE4 + movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 +#endif + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lcbc_dec_loop4 + cmp $16, LEN + jb .Lcbc_dec_ret +.align 4 +.Lcbc_dec_loop1: + movups (INP), IN + movaps IN, STATE + call _aesni_dec1 + pxor IV, STATE + movups STATE, (OUTP) + movaps IN, IV + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_dec_loop1 +.Lcbc_dec_ret: + movups IV, (IVP) +.Lcbc_dec_just_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_cbc_dec) + +/* + * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_cts_cbc_enc) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + mov 480(KEYP), KLEN + movups (IVP), STATE + sub $16, LEN + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + movups (T1), %xmm4 + movups (IVP), %xmm5 + + movups (INP), IN1 + add LEN, INP + movups (INP), IN2 + + pxor IN1, STATE + call _aesni_enc1 + + pshufb %xmm5, IN2 + pxor STATE, IN2 + pshufb %xmm4, STATE + add OUTP, LEN + movups STATE, (LEN) + + movaps IN2, STATE + call _aesni_enc1 + movups STATE, (OUTP) + +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_cts_cbc_enc) + +/* + * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_cts_cbc_dec) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + mov 480(KEYP), KLEN + add $240, KEYP + movups (IVP), IV + sub $16, LEN + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + movups (T1), %xmm4 + + movups (INP), STATE + add LEN, INP + movups (INP), IN1 + + call _aesni_dec1 + movaps STATE, IN2 + pshufb %xmm4, STATE + pxor IN1, STATE + + add OUTP, LEN + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + call _aesni_dec1 + + pxor IV, STATE + movups STATE, (OUTP) + +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + RET +SYM_FUNC_END(aesni_cts_cbc_dec) + +.pushsection .rodata +.align 16 +.Lcts_permute_table: + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 +#ifdef __x86_64__ +.Lbswap_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +#endif +.popsection + +#ifdef __x86_64__ +/* + * _aesni_inc_init: internal ABI + * setup registers used by _aesni_inc + * input: + * IV + * output: + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + */ +SYM_FUNC_START_LOCAL(_aesni_inc_init) + movaps .Lbswap_mask, BSWAP_MASK + movaps IV, CTR + pshufb BSWAP_MASK, CTR + mov $1, TCTR_LOW + movq TCTR_LOW, INC + movq CTR, TCTR_LOW + RET +SYM_FUNC_END(_aesni_inc_init) + +/* + * _aesni_inc: internal ABI + * Increase IV by 1, IV is in big endian + * input: + * IV + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + * output: + * IV: Increase by 1 + * changed: + * CTR: == output IV, in little endian + * TCTR_LOW: == lower qword of CTR + */ +SYM_FUNC_START_LOCAL(_aesni_inc) + paddq INC, CTR + add $1, TCTR_LOW + jnc .Linc_low + pslldq $8, INC + paddq INC, CTR + psrldq $8, INC +.Linc_low: + movaps CTR, IV + pshufb BSWAP_MASK, IV + RET +SYM_FUNC_END(_aesni_inc) + +/* + * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_ctr_enc) + FRAME_BEGIN + cmp $16, LEN + jb .Lctr_enc_just_ret + mov 480(KEYP), KLEN + movups (IVP), IV + call _aesni_inc_init + cmp $64, LEN + jb .Lctr_enc_loop1 +.align 4 +.Lctr_enc_loop4: + movaps IV, STATE1 + call _aesni_inc + movups (INP), IN1 + movaps IV, STATE2 + call _aesni_inc + movups 0x10(INP), IN2 + movaps IV, STATE3 + call _aesni_inc + movups 0x20(INP), IN3 + movaps IV, STATE4 + call _aesni_inc + movups 0x30(INP), IN4 + call _aesni_enc4 + pxor IN1, STATE1 + movups STATE1, (OUTP) + pxor IN2, STATE2 + movups STATE2, 0x10(OUTP) + pxor IN3, STATE3 + movups STATE3, 0x20(OUTP) + pxor IN4, STATE4 + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lctr_enc_loop4 + cmp $16, LEN + jb .Lctr_enc_ret +.align 4 +.Lctr_enc_loop1: + movaps IV, STATE + call _aesni_inc + movups (INP), IN + call _aesni_enc1 + pxor IN, STATE + movups STATE, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lctr_enc_loop1 +.Lctr_enc_ret: + movups IV, (IVP) +.Lctr_enc_just_ret: + FRAME_END + RET +SYM_FUNC_END(aesni_ctr_enc) + +#endif + +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 +.align 16 +.Lgf128mul_x_ble_mask: + .octa 0x00000000000000010000000000000087 +.previous + +/* + * _aesni_gf128mul_x_ble: internal ABI + * Multiply in GF(2^128) for XTS IVs + * input: + * IV: current IV + * GF128MUL_MASK == mask with 0x87 and 0x01 + * output: + * IV: next IV + * changed: + * CTR: == temporary value + */ +#define _aesni_gf128mul_x_ble() \ + pshufd $0x13, IV, KEY; \ + paddq IV, IV; \ + psrad $31, KEY; \ + pand GF128MUL_MASK, KEY; \ + pxor KEY, IV; + +/* + * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_encrypt) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK +#else + movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK +#endif + movups (IVP), IV + + mov 480(KEYP), KLEN + +.Lxts_enc_loop4: + sub $64, LEN + jl .Lxts_enc_1x + + movdqa IV, STATE1 + movdqu 0x00(INP), IN + pxor IN, STATE1 + movdqu IV, 0x00(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE2 + movdqu 0x10(INP), IN + pxor IN, STATE2 + movdqu IV, 0x10(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE3 + movdqu 0x20(INP), IN + pxor IN, STATE3 + movdqu IV, 0x20(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE4 + movdqu 0x30(INP), IN + pxor IN, STATE4 + movdqu IV, 0x30(OUTP) + + call _aesni_enc4 + + movdqu 0x00(OUTP), IN + pxor IN, STATE1 + movdqu STATE1, 0x00(OUTP) + + movdqu 0x10(OUTP), IN + pxor IN, STATE2 + movdqu STATE2, 0x10(OUTP) + + movdqu 0x20(OUTP), IN + pxor IN, STATE3 + movdqu STATE3, 0x20(OUTP) + + movdqu 0x30(OUTP), IN + pxor IN, STATE4 + movdqu STATE4, 0x30(OUTP) + + _aesni_gf128mul_x_ble() + + add $64, INP + add $64, OUTP + test LEN, LEN + jnz .Lxts_enc_loop4 + +.Lxts_enc_ret_iv: + movups IV, (IVP) + +.Lxts_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + RET + +.Lxts_enc_1x: + add $64, LEN + jz .Lxts_enc_ret_iv + sub $16, LEN + jl .Lxts_enc_cts4 + +.Lxts_enc_loop1: + movdqu (INP), STATE + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE + _aesni_gf128mul_x_ble() + + test LEN, LEN + jz .Lxts_enc_out + + add $16, INP + sub $16, LEN + jl .Lxts_enc_cts1 + + movdqu STATE, (OUTP) + add $16, OUTP + jmp .Lxts_enc_loop1 + +.Lxts_enc_out: + movdqu STATE, (OUTP) + jmp .Lxts_enc_ret_iv + +.Lxts_enc_cts4: + movdqa STATE4, STATE + sub $16, OUTP + +.Lxts_enc_cts1: +#ifndef __x86_64__ + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + add LEN, INP /* rewind input pointer */ + add $16, LEN /* # bytes in final block */ + movups (INP), IN1 + + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + add OUTP, LEN + + movups (T1), %xmm4 + movaps STATE, IN2 + pshufb %xmm4, STATE + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE + + movups STATE, (OUTP) + jmp .Lxts_enc_ret +SYM_FUNC_END(aesni_xts_encrypt) + +/* + * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_decrypt) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK +#else + movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK +#endif + movups (IVP), IV + + mov 480(KEYP), KLEN + add $240, KEYP + + test $15, LEN + jz .Lxts_dec_loop4 + sub $16, LEN + +.Lxts_dec_loop4: + sub $64, LEN + jl .Lxts_dec_1x + + movdqa IV, STATE1 + movdqu 0x00(INP), IN + pxor IN, STATE1 + movdqu IV, 0x00(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE2 + movdqu 0x10(INP), IN + pxor IN, STATE2 + movdqu IV, 0x10(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE3 + movdqu 0x20(INP), IN + pxor IN, STATE3 + movdqu IV, 0x20(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE4 + movdqu 0x30(INP), IN + pxor IN, STATE4 + movdqu IV, 0x30(OUTP) + + call _aesni_dec4 + + movdqu 0x00(OUTP), IN + pxor IN, STATE1 + movdqu STATE1, 0x00(OUTP) + + movdqu 0x10(OUTP), IN + pxor IN, STATE2 + movdqu STATE2, 0x10(OUTP) + + movdqu 0x20(OUTP), IN + pxor IN, STATE3 + movdqu STATE3, 0x20(OUTP) + + movdqu 0x30(OUTP), IN + pxor IN, STATE4 + movdqu STATE4, 0x30(OUTP) + + _aesni_gf128mul_x_ble() + + add $64, INP + add $64, OUTP + test LEN, LEN + jnz .Lxts_dec_loop4 + +.Lxts_dec_ret_iv: + movups IV, (IVP) + +.Lxts_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + RET + +.Lxts_dec_1x: + add $64, LEN + jz .Lxts_dec_ret_iv + +.Lxts_dec_loop1: + movdqu (INP), STATE + + add $16, INP + sub $16, LEN + jl .Lxts_dec_cts1 + + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE + _aesni_gf128mul_x_ble() + + test LEN, LEN + jz .Lxts_dec_out + + movdqu STATE, (OUTP) + add $16, OUTP + jmp .Lxts_dec_loop1 + +.Lxts_dec_out: + movdqu STATE, (OUTP) + jmp .Lxts_dec_ret_iv + +.Lxts_dec_cts1: + movdqa IV, STATE4 + _aesni_gf128mul_x_ble() + + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE + +#ifndef __x86_64__ + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + add LEN, INP /* rewind input pointer */ + add $16, LEN /* # bytes in final block */ + movups (INP), IN1 + + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + add OUTP, LEN + + movups (T1), %xmm4 + movaps STATE, IN2 + pshufb %xmm4, STATE + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + + pxor STATE4, STATE + call _aesni_dec1 + pxor STATE4, STATE + + movups STATE, (OUTP) + jmp .Lxts_dec_ret +SYM_FUNC_END(aesni_xts_decrypt) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S new file mode 100644 index 000000000..0852ab573 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -0,0 +1,2826 @@ +######################################################################## +# Copyright (c) 2013, Intel Corporation +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR +# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## +## +## Authors: +## Erdinc Ozturk +## Vinodh Gopal +## James Guilford +## Tim Chen +## +## References: +## This code was derived and highly optimized from the code described in paper: +## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation +## on Intel Architecture Processors. August, 2010 +## The details of the implementation is explained in: +## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode +## on Intel Architecture Processors. October, 2012. +## +## Assumptions: +## +## +## +## iv: +## 0 1 2 3 +## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | Salt (From the SA) | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | Initialization Vector | +## | (This is the sequence number from IPSec header) | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | 0x1 | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## +## +## +## AAD: +## AAD padded to 128 bits with 0 +## for example, assume AAD is a u32 vector +## +## if AAD is 8 bytes: +## AAD[3] = {A0, A1}# +## padded AAD in xmm register = {A1 A0 0 0} +## +## 0 1 2 3 +## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | SPI (A1) | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | 32-bit Sequence Number (A0) | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | 0x0 | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## +## AAD Format with 32-bit Sequence Number +## +## if AAD is 12 bytes: +## AAD[3] = {A0, A1, A2}# +## padded AAD in xmm register = {A2 A1 A0 0} +## +## 0 1 2 3 +## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | SPI (A2) | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | 64-bit Extended Sequence Number {A1,A0} | +## | | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## | 0x0 | +## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +## +## AAD Format with 64-bit Extended Sequence Number +## +## +## aadLen: +## from the definition of the spec, aadLen can only be 8 or 12 bytes. +## The code additionally supports aadLen of length 16 bytes. +## +## TLen: +## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +## +## poly = x^128 + x^127 + x^126 + x^121 + 1 +## throughout the code, one tab and two tab indentations are used. one tab is +## for GHASH part, two tabs is for AES part. +## + +#include + +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.POLY, "aM", @progbits, 16 +.align 16 +POLY: .octa 0xC2000000000000000000000000000001 + +.section .rodata.cst16.POLY2, "aM", @progbits, 16 +.align 16 +POLY2: .octa 0xC20000000000000000000001C2000000 + +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 +TWOONE: .octa 0x00000001000000000000000000000001 + +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 +SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F + +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 +ONE: .octa 0x00000000000000000000000000000001 + +.section .rodata.cst16.ONEf, "aM", @progbits, 16 +.align 16 +ONEf: .octa 0x01000000000000000000000000000000 + +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + +.section .rodata +.align 16 +.type aad_shift_arr, @object +.size aad_shift_arr, 272 +aad_shift_arr: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0xffffffffffffffffffffffffffffff0C + .octa 0xffffffffffffffffffffffffffff0D0C + .octa 0xffffffffffffffffffffffffff0E0D0C + .octa 0xffffffffffffffffffffffff0F0E0D0C + .octa 0xffffffffffffffffffffff0C0B0A0908 + .octa 0xffffffffffffffffffff0D0C0B0A0908 + .octa 0xffffffffffffffffff0E0D0C0B0A0908 + .octa 0xffffffffffffffff0F0E0D0C0B0A0908 + .octa 0xffffffffffffff0C0B0A090807060504 + .octa 0xffffffffffff0D0C0B0A090807060504 + .octa 0xffffffffff0E0D0C0B0A090807060504 + .octa 0xffffffff0F0E0D0C0B0A090807060504 + .octa 0xffffff0C0B0A09080706050403020100 + .octa 0xffff0D0C0B0A09080706050403020100 + .octa 0xff0E0D0C0B0A09080706050403020100 + .octa 0x0F0E0D0C0B0A09080706050403020100 + + +.text + + +#define AadHash 16*0 +#define AadLen 16*1 +#define InLen (16*1)+8 +#define PBlockEncKey 16*2 +#define OrigIV 16*3 +#define CurCount 16*4 +#define PBlockLen 16*5 + +HashKey = 16*6 # store HashKey <<1 mod poly here +HashKey_2 = 16*7 # store HashKey^2 <<1 mod poly here +HashKey_3 = 16*8 # store HashKey^3 <<1 mod poly here +HashKey_4 = 16*9 # store HashKey^4 <<1 mod poly here +HashKey_5 = 16*10 # store HashKey^5 <<1 mod poly here +HashKey_6 = 16*11 # store HashKey^6 <<1 mod poly here +HashKey_7 = 16*12 # store HashKey^7 <<1 mod poly here +HashKey_8 = 16*13 # store HashKey^8 <<1 mod poly here +HashKey_k = 16*14 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) +HashKey_2_k = 16*15 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) +HashKey_3_k = 16*16 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) +HashKey_4_k = 16*17 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) +HashKey_5_k = 16*18 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) +HashKey_6_k = 16*19 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) +HashKey_7_k = 16*20 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) +HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) + +#define arg1 %rdi +#define arg2 %rsi +#define arg3 %rdx +#define arg4 %rcx +#define arg5 %r8 +#define arg6 %r9 +#define keysize 2*15*16(arg1) + +i = 0 +j = 0 + +out_order = 0 +in_order = 1 +DEC = 0 +ENC = 1 + +.macro define_reg r n +reg_\r = %xmm\n +.endm + +.macro setreg +.altmacro +define_reg i %i +define_reg j %j +.noaltmacro +.endm + +TMP1 = 16*0 # Temporary storage for AAD +TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) +TMP3 = 16*2 # Temporary storage for AES State 3 +TMP4 = 16*3 # Temporary storage for AES State 4 +TMP5 = 16*4 # Temporary storage for AES State 5 +TMP6 = 16*5 # Temporary storage for AES State 6 +TMP7 = 16*6 # Temporary storage for AES State 7 +TMP8 = 16*7 # Temporary storage for AES State 8 + +VARIABLE_OFFSET = 16*8 + +################################ +# Utility Macros +################################ + +.macro FUNC_SAVE + push %r12 + push %r13 + push %r15 + + push %rbp + mov %rsp, %rbp + + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp # align rsp to 64 bytes +.endm + +.macro FUNC_RESTORE + mov %rbp, %rsp + pop %rbp + + pop %r15 + pop %r13 + pop %r12 +.endm + +# Encryption of a single block +.macro ENCRYPT_SINGLE_BLOCK REP XMM0 + vpxor (arg1), \XMM0, \XMM0 + i = 1 + setreg +.rep \REP + vaesenc 16*i(arg1), \XMM0, \XMM0 + i = (i+1) + setreg +.endr + vaesenclast 16*i(arg1), \XMM0, \XMM0 +.endm + +# combined for GCM encrypt and decrypt functions +# clobbering all xmm registers +# clobbering r10, r11, r12, r13, r15, rax +.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP + vmovdqu AadHash(arg2), %xmm8 + vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey + add arg5, InLen(arg2) + + # initialize the data pointer offset as zero + xor %r11d, %r11d + + PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC + sub %r11, arg5 + + mov arg5, %r13 # save the number of bytes of plaintext/ciphertext + and $-16, %r13 # r13 = r13 - (r13 mod 16) + + mov %r13, %r12 + shr $4, %r12 + and $7, %r12 + jz _initial_num_blocks_is_0\@ + + cmp $7, %r12 + je _initial_num_blocks_is_7\@ + cmp $6, %r12 + je _initial_num_blocks_is_6\@ + cmp $5, %r12 + je _initial_num_blocks_is_5\@ + cmp $4, %r12 + je _initial_num_blocks_is_4\@ + cmp $3, %r12 + je _initial_num_blocks_is_3\@ + cmp $2, %r12 + je _initial_num_blocks_is_2\@ + + jmp _initial_num_blocks_is_1\@ + +_initial_num_blocks_is_7\@: + \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*7, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_6\@: + \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*6, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_5\@: + \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*5, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_4\@: + \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*4, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_3\@: + \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*3, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_2\@: + \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*2, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_1\@: + \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*1, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_0\@: + \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + + +_initial_blocks_encrypted\@: + test %r13, %r13 + je _zero_cipher_left\@ + + sub $128, %r13 + je _eight_cipher_left\@ + + + + + vmovd %xmm9, %r15d + and $255, %r15d + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + + +_encrypt_by_8_new\@: + cmp $(255-8), %r15d + jg _encrypt_by_8\@ + + + + add $8, %r15b + \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC + add $128, %r11 + sub $128, %r13 + jne _encrypt_by_8_new\@ + + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + jmp _eight_cipher_left\@ + +_encrypt_by_8\@: + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + add $8, %r15b + \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + add $128, %r11 + sub $128, %r13 + jne _encrypt_by_8_new\@ + + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + + + + +_eight_cipher_left\@: + \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 + + +_zero_cipher_left\@: + vmovdqu %xmm14, AadHash(arg2) + vmovdqu %xmm9, CurCount(arg2) + + # check for 0 length + mov arg5, %r13 + and $15, %r13 # r13 = (arg5 mod 16) + + je _multiple_of_16_bytes\@ + + # handle the last <16 Byte block separately + + mov %r13, PBlockLen(arg2) + + vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn + vmovdqu %xmm9, CurCount(arg2) + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + + ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn) + vmovdqu %xmm9, PBlockEncKey(arg2) + + cmp $16, arg5 + jge _large_enough_update\@ + + lea (arg4,%r11,1), %r10 + mov %r13, %r12 + + READ_PARTIAL_BLOCK %r10 %r12 %xmm1 + + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 # adjust the shuffle mask pointer to be + # able to shift 16-r13 bytes (r13 is the + # number of bytes in plaintext mod 16) + + jmp _final_ghash_mul\@ + +_large_enough_update\@: + sub $16, %r11 + add %r13, %r11 + + # receive the last <16 Byte block + vmovdqu (arg4, %r11, 1), %xmm1 + + sub %r13, %r11 + add $16, %r11 + + lea SHIFT_MASK+16(%rip), %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (r13 is the number of bytes in plaintext mod 16) + sub %r13, %r12 + # get the appropriate shuffle mask + vmovdqu (%r12), %xmm2 + # shift right 16-r13 bytes + vpshufb %xmm2, %xmm1, %xmm1 + +_final_ghash_mul\@: + .if \ENC_DEC == DEC + vmovdqa %xmm1, %xmm2 + vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to + # mask out top 16-r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 + vpand %xmm1, %xmm2, %xmm2 + vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 + vpxor %xmm2, %xmm14, %xmm14 + + vmovdqu %xmm14, AadHash(arg2) + .else + vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to + # mask out top 16-r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + vpxor %xmm9, %xmm14, %xmm14 + + vmovdqu %xmm14, AadHash(arg2) + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext + .endif + + + ############################# + # output r13 Bytes + vmovq %xmm9, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left\@ + + mov %rax, (arg3 , %r11) + add $8, %r11 + vpsrldq $8, %xmm9, %xmm9 + vmovq %xmm9, %rax + sub $8, %r13 + +_less_than_8_bytes_left\@: + movb %al, (arg3 , %r11) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left\@ + ############################# + +_multiple_of_16_bytes\@: +.endm + + +# GCM_COMPLETE Finishes update of tag of last partial block +# Output: Authorization Tag (AUTH_TAG) +# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 +.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN + vmovdqu AadHash(arg2), %xmm14 + vmovdqu HashKey(arg2), %xmm13 + + mov PBlockLen(arg2), %r12 + test %r12, %r12 + je _partial_done\@ + + #GHASH computation for the last <16 Byte block + \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + +_partial_done\@: + mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + vmovd %r12d, %xmm15 # len(A) in xmm15 + + mov InLen(arg2), %r12 + shl $3, %r12 # len(C) in bits (*128) + vmovq %r12, %xmm1 + vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 + vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) + + vpxor %xmm15, %xmm14, %xmm14 + \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation + vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap + + vmovdqu OrigIV(arg2), %xmm9 + + ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0) + + vpxor %xmm14, %xmm9, %xmm9 + + + +_return_T\@: + mov \AUTH_TAG, %r10 # r10 = authTag + mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len + + cmp $16, %r11 + je _T_16\@ + + cmp $8, %r11 + jl _T_4\@ + +_T_8\@: + vmovq %xmm9, %rax + mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 + vpsrldq $8, %xmm9, %xmm9 + test %r11, %r11 + je _return_T_done\@ +_T_4\@: + vmovd %xmm9, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 + test %r11, %r11 + je _return_T_done\@ +_T_123\@: + vmovd %xmm9, %eax + cmp $2, %r11 + jl _T_1\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done\@ + add $2, %r10 + sar $16, %eax +_T_1\@: + mov %al, (%r10) + jmp _return_T_done\@ + +_T_16\@: + vmovdqu %xmm9, (%r10) + +_return_T_done\@: +.endm + +.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 + + mov \AAD, %r10 # r10 = AAD + mov \AADLEN, %r12 # r12 = aadLen + + + mov %r12, %r11 + + vpxor \T8, \T8, \T8 + vpxor \T7, \T7, \T7 + cmp $16, %r11 + jl _get_AAD_rest8\@ +_get_AAD_blocks\@: + vmovdqu (%r10), \T7 + vpshufb SHUF_MASK(%rip), \T7, \T7 + vpxor \T7, \T8, \T8 + \GHASH_MUL \T8, \T2, \T1, \T3, \T4, \T5, \T6 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu \T8, \T7 + test %r11, %r11 + je _get_AAD_done\@ + + vpxor \T7, \T7, \T7 + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\@: + cmp $4, %r11 + jle _get_AAD_rest4\@ + movq (%r10), \T1 + add $8, %r10 + sub $8, %r11 + vpslldq $8, \T1, \T1 + vpsrldq $8, \T7, \T7 + vpxor \T1, \T7, \T7 + jmp _get_AAD_rest8\@ +_get_AAD_rest4\@: + test %r11, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 + add $4, %r10 + sub $4, %r11 + vpslldq $12, \T1, \T1 + vpsrldq $4, \T7, \T7 + vpxor \T1, \T7, \T7 +_get_AAD_rest0\@: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + vmovdqu aad_shift_arr(%r11), \T1 + vpshufb \T1, \T7, \T7 +_get_AAD_rest_final\@: + vpshufb SHUF_MASK(%rip), \T7, \T7 + vpxor \T8, \T7, \T7 + \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 + +_get_AAD_done\@: + vmovdqu \T7, AadHash(arg2) +.endm + +.macro INIT GHASH_MUL PRECOMPUTE + mov arg6, %r11 + mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length + xor %r11d, %r11d + mov %r11, InLen(arg2) # ctx_data.in_length = 0 + + mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0 + mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0 + mov arg3, %rax + movdqu (%rax), %xmm0 + movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv + + vpshufb SHUF_MASK(%rip), %xmm0, %xmm0 + movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv + + vmovdqu (arg4), %xmm6 # xmm6 = HashKey + + vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 + ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey + vmovdqa %xmm6, %xmm2 + vpsllq $1, %xmm6, %xmm6 + vpsrlq $63, %xmm2, %xmm2 + vmovdqa %xmm2, %xmm1 + vpslldq $8, %xmm2, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + #reduction + vpshufd $0b00100100, %xmm1, %xmm2 + vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 + vpand POLY(%rip), %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly + ####################################################################### + vmovdqu %xmm6, HashKey(arg2) # store HashKey<<1 mod poly + + CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0 + + \PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 +.endm + + +# Reads DLEN bytes starting at DPTR and stores in XMMDst +# where 0 < DLEN < 16 +# Clobbers %rax, DLEN +.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst + vpxor \XMMDst, \XMMDst, \XMMDst + + cmp $8, \DLEN + jl _read_lt8_\@ + mov (\DPTR), %rax + vpinsrq $0, %rax, \XMMDst, \XMMDst + sub $8, \DLEN + jz _done_read_partial_block_\@ + xor %eax, %eax +_read_next_byte_\@: + shl $8, %rax + mov 7(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_\@ + vpinsrq $1, %rax, \XMMDst, \XMMDst + jmp _done_read_partial_block_\@ +_read_lt8_\@: + xor %eax, %eax +_read_next_byte_lt8_\@: + shl $8, %rax + mov -1(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_lt8_\@ + vpinsrq $0, %rax, \XMMDst, \XMMDst +_done_read_partial_block_\@: +.endm + +# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks +# between update calls. +# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK +# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context +# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 +.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH ENC_DEC + mov PBlockLen(arg2), %r13 + test %r13, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN + jl _fewer_than_16_bytes_\@ + vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm + jmp _data_read_\@ + +_fewer_than_16_bytes_\@: + lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 + mov \PLAIN_CYPH_LEN, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm1 + + mov PBlockLen(arg2), %r13 + +_data_read_\@: # Finished reading in data + + vmovdqu PBlockEncKey(arg2), %xmm9 + vmovdqu HashKey(arg2), %xmm13 + + lea SHIFT_MASK(%rip), %r12 + + # adjust the shuffle mask pointer to be able to shift r13 bytes + # r16-r13 is the number of bytes in plaintext mod 16) + add %r13, %r12 + vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask + vpshufb %xmm2, %xmm9, %xmm9 # shift right r13 bytes + +.if \ENC_DEC == DEC + vmovdqa %xmm1, %xmm3 + pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_1_\@ + sub %r10, %r12 +_no_extra_mask_1_\@: + + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 # mask out bottom r13 bytes of xmm9 + + vpand %xmm1, %xmm3, %xmm3 + vmovdqa SHUF_MASK(%rip), %xmm10 + vpshufb %xmm10, %xmm3, %xmm3 + vpshufb %xmm2, %xmm3, %xmm3 + vpxor %xmm3, \AAD_HASH, \AAD_HASH + + test %r10, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block + \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %eax,%eax + + mov %rax, PBlockLen(arg2) + jmp _dec_done_\@ +_partial_incomplete_1_\@: + add \PLAIN_CYPH_LEN, PBlockLen(arg2) +_dec_done_\@: + vmovdqu \AAD_HASH, AadHash(arg2) +.else + vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_2_\@ + sub %r10, %r12 +_no_extra_mask_2_\@: + + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 + + vmovdqa SHUF_MASK(%rip), %xmm1 + vpshufb %xmm1, %xmm9, %xmm9 + vpshufb %xmm2, %xmm9, %xmm9 + vpxor %xmm9, \AAD_HASH, \AAD_HASH + + test %r10, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block + \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %eax,%eax + + mov %rax, PBlockLen(arg2) + jmp _encode_done_\@ +_partial_incomplete_2_\@: + add \PLAIN_CYPH_LEN, PBlockLen(arg2) +_encode_done_\@: + vmovdqu \AAD_HASH, AadHash(arg2) + + vmovdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm9 back to output as ciphertext + vpshufb %xmm10, %xmm9, %xmm9 + vpshufb %xmm2, %xmm9, %xmm9 +.endif + # output encrypted Bytes + test %r10, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 + # Set r13 to be the number of bytes to write out + sub %r12, %r13 + jmp _count_set_\@ +_partial_fill_\@: + mov \PLAIN_CYPH_LEN, %r13 +_count_set_\@: + vmovdqa %xmm9, %xmm0 + vmovq %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + + mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $8, \DATA_OFFSET + psrldq $8, %xmm0 + vmovq %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $1, \DATA_OFFSET + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_partial_block_done_\@: +.endm # PARTIAL_BLOCK + +############################################################################### +# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) +# Input: A and B (128-bits each, bit-reflected) +# Output: C = A*B*x mod poly, (i.e. >>1 ) +# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input +# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. +############################################################################### +.macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 + + vpshufd $0b01001110, \GH, \T2 + vpshufd $0b01001110, \HK, \T3 + vpxor \GH , \T2, \T2 # T2 = (a1+a0) + vpxor \HK , \T3, \T3 # T3 = (b1+b0) + + vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 + vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 + vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) + vpxor \GH, \T2,\T2 + vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 + + vpslldq $8, \T2,\T3 # shift-L T3 2 DWs + vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs + vpxor \T3, \GH, \GH + vpxor \T2, \T1, \T1 # = GH x HK + + #first phase of the reduction + vpslld $31, \GH, \T2 # packed right shifting << 31 + vpslld $30, \GH, \T3 # packed right shifting shift << 30 + vpslld $25, \GH, \T4 # packed right shifting shift << 25 + + vpxor \T3, \T2, \T2 # xor the shifted versions + vpxor \T4, \T2, \T2 + + vpsrldq $4, \T2, \T5 # shift-R T5 1 DW + + vpslldq $12, \T2, \T2 # shift-L T2 3 DWs + vpxor \T2, \GH, \GH # first phase of the reduction complete + + #second phase of the reduction + + vpsrld $1,\GH, \T2 # packed left shifting >> 1 + vpsrld $2,\GH, \T3 # packed left shifting >> 2 + vpsrld $7,\GH, \T4 # packed left shifting >> 7 + vpxor \T3, \T2, \T2 # xor the shifted versions + vpxor \T4, \T2, \T2 + + vpxor \T5, \T2, \T2 + vpxor \T2, \GH, \GH + vpxor \T1, \GH, \GH # the result is in GH + + +.endm + +.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 + + # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i + vmovdqa \HK, \T5 + + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly + vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_2_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly + vmovdqu \T5, HashKey_3(arg2) + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_3_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly + vmovdqu \T5, HashKey_4(arg2) + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_4_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly + vmovdqu \T5, HashKey_5(arg2) + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_5_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly + vmovdqu \T5, HashKey_6(arg2) + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_6_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly + vmovdqu \T5, HashKey_7(arg2) + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_7_k(arg2) + + GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly + vmovdqu \T5, HashKey_8(arg2) + vpshufd $0b01001110, \T5, \T1 + vpxor \T5, \T1, \T1 + vmovdqu \T1, HashKey_8_k(arg2) + +.endm + +## if a = number of total plaintext bytes +## b = floor(a/16) +## num_initial_blocks = b mod 4# +## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext +## r10, r11, r12, rax are clobbered +## arg1, arg2, arg3, arg4 are used as pointers only, not modified + +.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC + i = (8-\num_initial_blocks) + setreg + vmovdqu AadHash(arg2), reg_i + + # start AES for num_initial_blocks blocks + vmovdqu CurCount(arg2), \CTR + + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, reg_i + vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap + i = (i+1) + setreg +.endr + + vmovdqa (arg1), \T_key + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vpxor \T_key, reg_i, reg_i + i = (i+1) + setreg +.endr + + j = 1 + setreg +.rep \REP + vmovdqa 16*j(arg1), \T_key + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vaesenc \T_key, reg_i, reg_i + i = (i+1) + setreg +.endr + + j = (j+1) + setreg +.endr + + vmovdqa 16*j(arg1), \T_key + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vaesenclast \T_key, reg_i, reg_i + i = (i+1) + setreg +.endr + + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vmovdqu (arg4, %r11), \T1 + vpxor \T1, reg_i, reg_i + vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for num_initial_blocks blocks + add $16, %r11 +.if \ENC_DEC == DEC + vmovdqa \T1, reg_i +.endif + vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations + i = (i+1) + setreg +.endr + + + i = (8-\num_initial_blocks) + j = (9-\num_initial_blocks) + setreg + +.rep \num_initial_blocks + vpxor reg_i, reg_j, reg_j + GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks + i = (i+1) + j = (j+1) + setreg +.endr + # XMM8 has the combined result here + + vmovdqa \XMM8, TMP1(%rsp) + vmovdqa \XMM8, \T3 + + cmp $128, %r13 + jl _initial_blocks_done\@ # no need for precomputed constants + +############################################################################### +# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM1 + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM2 + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM3 + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM4 + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM5 + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM6 + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM7 + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM8 + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap + + vmovdqa (arg1), \T_key + vpxor \T_key, \XMM1, \XMM1 + vpxor \T_key, \XMM2, \XMM2 + vpxor \T_key, \XMM3, \XMM3 + vpxor \T_key, \XMM4, \XMM4 + vpxor \T_key, \XMM5, \XMM5 + vpxor \T_key, \XMM6, \XMM6 + vpxor \T_key, \XMM7, \XMM7 + vpxor \T_key, \XMM8, \XMM8 + + i = 1 + setreg +.rep \REP # do REP rounds + vmovdqa 16*i(arg1), \T_key + vaesenc \T_key, \XMM1, \XMM1 + vaesenc \T_key, \XMM2, \XMM2 + vaesenc \T_key, \XMM3, \XMM3 + vaesenc \T_key, \XMM4, \XMM4 + vaesenc \T_key, \XMM5, \XMM5 + vaesenc \T_key, \XMM6, \XMM6 + vaesenc \T_key, \XMM7, \XMM7 + vaesenc \T_key, \XMM8, \XMM8 + i = (i+1) + setreg +.endr + + vmovdqa 16*i(arg1), \T_key + vaesenclast \T_key, \XMM1, \XMM1 + vaesenclast \T_key, \XMM2, \XMM2 + vaesenclast \T_key, \XMM3, \XMM3 + vaesenclast \T_key, \XMM4, \XMM4 + vaesenclast \T_key, \XMM5, \XMM5 + vaesenclast \T_key, \XMM6, \XMM6 + vaesenclast \T_key, \XMM7, \XMM7 + vaesenclast \T_key, \XMM8, \XMM8 + + vmovdqu (arg4, %r11), \T1 + vpxor \T1, \XMM1, \XMM1 + vmovdqu \XMM1, (arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM1 + .endif + + vmovdqu 16*1(arg4, %r11), \T1 + vpxor \T1, \XMM2, \XMM2 + vmovdqu \XMM2, 16*1(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM2 + .endif + + vmovdqu 16*2(arg4, %r11), \T1 + vpxor \T1, \XMM3, \XMM3 + vmovdqu \XMM3, 16*2(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM3 + .endif + + vmovdqu 16*3(arg4, %r11), \T1 + vpxor \T1, \XMM4, \XMM4 + vmovdqu \XMM4, 16*3(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM4 + .endif + + vmovdqu 16*4(arg4, %r11), \T1 + vpxor \T1, \XMM5, \XMM5 + vmovdqu \XMM5, 16*4(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM5 + .endif + + vmovdqu 16*5(arg4, %r11), \T1 + vpxor \T1, \XMM6, \XMM6 + vmovdqu \XMM6, 16*5(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM6 + .endif + + vmovdqu 16*6(arg4, %r11), \T1 + vpxor \T1, \XMM7, \XMM7 + vmovdqu \XMM7, 16*6(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM7 + .endif + + vmovdqu 16*7(arg4, %r11), \T1 + vpxor \T1, \XMM8, \XMM8 + vmovdqu \XMM8, 16*7(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM8 + .endif + + add $128, %r11 + + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap + +############################################################################### + +_initial_blocks_done\@: + +.endm + +# encrypt 8 blocks at a time +# ghash the 8 previously encrypted ciphertext blocks +# arg1, arg2, arg3, arg4 are used as pointers only, not modified +# r11 is the data offset value +.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC + + vmovdqa \XMM1, \T2 + vmovdqa \XMM2, TMP2(%rsp) + vmovdqa \XMM3, TMP3(%rsp) + vmovdqa \XMM4, TMP4(%rsp) + vmovdqa \XMM5, TMP5(%rsp) + vmovdqa \XMM6, TMP6(%rsp) + vmovdqa \XMM7, TMP7(%rsp) + vmovdqa \XMM8, TMP8(%rsp) + +.if \loop_idx == in_order + vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT + vpaddd ONE(%rip), \XMM1, \XMM2 + vpaddd ONE(%rip), \XMM2, \XMM3 + vpaddd ONE(%rip), \XMM3, \XMM4 + vpaddd ONE(%rip), \XMM4, \XMM5 + vpaddd ONE(%rip), \XMM5, \XMM6 + vpaddd ONE(%rip), \XMM6, \XMM7 + vpaddd ONE(%rip), \XMM7, \XMM8 + vmovdqa \XMM8, \CTR + + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap +.else + vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT + vpaddd ONEf(%rip), \XMM1, \XMM2 + vpaddd ONEf(%rip), \XMM2, \XMM3 + vpaddd ONEf(%rip), \XMM3, \XMM4 + vpaddd ONEf(%rip), \XMM4, \XMM5 + vpaddd ONEf(%rip), \XMM5, \XMM6 + vpaddd ONEf(%rip), \XMM6, \XMM7 + vpaddd ONEf(%rip), \XMM7, \XMM8 + vmovdqa \XMM8, \CTR +.endif + + + ####################################################################### + + vmovdqu (arg1), \T1 + vpxor \T1, \XMM1, \XMM1 + vpxor \T1, \XMM2, \XMM2 + vpxor \T1, \XMM3, \XMM3 + vpxor \T1, \XMM4, \XMM4 + vpxor \T1, \XMM5, \XMM5 + vpxor \T1, \XMM6, \XMM6 + vpxor \T1, \XMM7, \XMM7 + vpxor \T1, \XMM8, \XMM8 + + ####################################################################### + + + + + + vmovdqu 16*1(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqu 16*2(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + + ####################################################################### + + vmovdqu HashKey_8(arg2), \T5 + vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 + vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 + + vpshufd $0b01001110, \T2, \T6 + vpxor \T2, \T6, \T6 + + vmovdqu HashKey_8_k(arg2), \T5 + vpclmulqdq $0x00, \T5, \T6, \T6 + + vmovdqu 16*3(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP2(%rsp), \T1 + vmovdqu HashKey_7(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_7_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*4(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + ####################################################################### + + vmovdqa TMP3(%rsp), \T1 + vmovdqu HashKey_6(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_6_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*5(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP4(%rsp), \T1 + vmovdqu HashKey_5(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_5_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*6(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + + vmovdqa TMP5(%rsp), \T1 + vmovdqu HashKey_4(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_4_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*7(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP6(%rsp), \T1 + vmovdqu HashKey_3(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_3_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + + vmovdqu 16*8(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP7(%rsp), \T1 + vmovdqu HashKey_2(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_2_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + ####################################################################### + + vmovdqu 16*9(arg1), \T5 + vaesenc \T5, \XMM1, \XMM1 + vaesenc \T5, \XMM2, \XMM2 + vaesenc \T5, \XMM3, \XMM3 + vaesenc \T5, \XMM4, \XMM4 + vaesenc \T5, \XMM5, \XMM5 + vaesenc \T5, \XMM6, \XMM6 + vaesenc \T5, \XMM7, \XMM7 + vaesenc \T5, \XMM8, \XMM8 + + vmovdqa TMP8(%rsp), \T1 + vmovdqu HashKey(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpshufd $0b01001110, \T1, \T3 + vpxor \T1, \T3, \T3 + vmovdqu HashKey_k(arg2), \T5 + vpclmulqdq $0x10, \T5, \T3, \T3 + vpxor \T3, \T6, \T6 + + vpxor \T4, \T6, \T6 + vpxor \T7, \T6, \T6 + + vmovdqu 16*10(arg1), \T5 + + i = 11 + setreg +.rep (\REP-9) + + vaesenc \T5, \XMM1, \XMM1 + vaesenc \T5, \XMM2, \XMM2 + vaesenc \T5, \XMM3, \XMM3 + vaesenc \T5, \XMM4, \XMM4 + vaesenc \T5, \XMM5, \XMM5 + vaesenc \T5, \XMM6, \XMM6 + vaesenc \T5, \XMM7, \XMM7 + vaesenc \T5, \XMM8, \XMM8 + + vmovdqu 16*i(arg1), \T5 + i = i + 1 + setreg +.endr + + i = 0 + j = 1 + setreg +.rep 8 + vpxor 16*i(arg4, %r11), \T5, \T2 + .if \ENC_DEC == ENC + vaesenclast \T2, reg_j, reg_j + .else + vaesenclast \T2, reg_j, \T3 + vmovdqu 16*i(arg4, %r11), reg_j + vmovdqu \T3, 16*i(arg3, %r11) + .endif + i = (i+1) + j = (j+1) + setreg +.endr + ####################################################################### + + + vpslldq $8, \T6, \T3 # shift-L T3 2 DWs + vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs + vpxor \T3, \T7, \T7 + vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 + + + + ####################################################################### + #first phase of the reduction + ####################################################################### + vpslld $31, \T7, \T2 # packed right shifting << 31 + vpslld $30, \T7, \T3 # packed right shifting shift << 30 + vpslld $25, \T7, \T4 # packed right shifting shift << 25 + + vpxor \T3, \T2, \T2 # xor the shifted versions + vpxor \T4, \T2, \T2 + + vpsrldq $4, \T2, \T1 # shift-R T1 1 DW + + vpslldq $12, \T2, \T2 # shift-L T2 3 DWs + vpxor \T2, \T7, \T7 # first phase of the reduction complete + ####################################################################### + .if \ENC_DEC == ENC + vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer + .endif + + ####################################################################### + #second phase of the reduction + vpsrld $1, \T7, \T2 # packed left shifting >> 1 + vpsrld $2, \T7, \T3 # packed left shifting >> 2 + vpsrld $7, \T7, \T4 # packed left shifting >> 7 + vpxor \T3, \T2, \T2 # xor the shifted versions + vpxor \T4, \T2, \T2 + + vpxor \T1, \T2, \T2 + vpxor \T2, \T7, \T7 + vpxor \T7, \T6, \T6 # the result is in T6 + ####################################################################### + + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap + + + vpxor \T6, \XMM1, \XMM1 + + + +.endm + + +# GHASH the last 4 ciphertext blocks. +.macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 + + ## Karatsuba Method + + + vpshufd $0b01001110, \XMM1, \T2 + vpxor \XMM1, \T2, \T2 + vmovdqu HashKey_8(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM1, \T6 + vpclmulqdq $0x00, \T5, \XMM1, \T7 + + vmovdqu HashKey_8_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM2, \T2 + vpxor \XMM2, \T2, \T2 + vmovdqu HashKey_7(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM2, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM2, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_7_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM3, \T2 + vpxor \XMM3, \T2, \T2 + vmovdqu HashKey_6(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM3, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM3, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_6_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM4, \T2 + vpxor \XMM4, \T2, \T2 + vmovdqu HashKey_5(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM4, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM4, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_5_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM5, \T2 + vpxor \XMM5, \T2, \T2 + vmovdqu HashKey_4(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM5, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM5, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_4_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM6, \T2 + vpxor \XMM6, \T2, \T2 + vmovdqu HashKey_3(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM6, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM6, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_3_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM7, \T2 + vpxor \XMM7, \T2, \T2 + vmovdqu HashKey_2(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM7, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM7, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_2_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vpshufd $0b01001110, \XMM8, \T2 + vpxor \XMM8, \T2, \T2 + vmovdqu HashKey(arg2), \T5 + vpclmulqdq $0x11, \T5, \XMM8, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM8, \T4 + vpxor \T4, \T7, \T7 + + vmovdqu HashKey_k(arg2), \T3 + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + vpxor \T6, \XMM1, \XMM1 + vpxor \T7, \XMM1, \T2 + + + + + vpslldq $8, \T2, \T4 + vpsrldq $8, \T2, \T2 + + vpxor \T4, \T7, \T7 + vpxor \T2, \T6, \T6 # holds the result of + # the accumulated carry-less multiplications + + ####################################################################### + #first phase of the reduction + vpslld $31, \T7, \T2 # packed right shifting << 31 + vpslld $30, \T7, \T3 # packed right shifting shift << 30 + vpslld $25, \T7, \T4 # packed right shifting shift << 25 + + vpxor \T3, \T2, \T2 # xor the shifted versions + vpxor \T4, \T2, \T2 + + vpsrldq $4, \T2, \T1 # shift-R T1 1 DW + + vpslldq $12, \T2, \T2 # shift-L T2 3 DWs + vpxor \T2, \T7, \T7 # first phase of the reduction complete + ####################################################################### + + + #second phase of the reduction + vpsrld $1, \T7, \T2 # packed left shifting >> 1 + vpsrld $2, \T7, \T3 # packed left shifting >> 2 + vpsrld $7, \T7, \T4 # packed left shifting >> 7 + vpxor \T3, \T2, \T2 # xor the shifted versions + vpxor \T4, \T2, \T2 + + vpxor \T1, \T2, \T2 + vpxor \T2, \T7, \T7 + vpxor \T7, \T6, \T6 # the result is in T6 + +.endm + +############################################################# +#void aesni_gcm_precomp_avx_gen2 +# (gcm_data *my_ctx_data, +# gcm_context_data *data, +# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ +# u8 *iv, /* Pre-counter block j0: 4 byte salt +# (from Security Association) concatenated with 8 byte +# Initialisation Vector (from IPSec ESP Payload) +# concatenated with 0x00000001. 16-byte aligned pointer. */ +# const u8 *aad, /* Additional Authentication Data (AAD)*/ +# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ +############################################################# +SYM_FUNC_START(aesni_gcm_init_avx_gen2) + FUNC_SAVE + INIT GHASH_MUL_AVX, PRECOMPUTE_AVX + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_init_avx_gen2) + +############################################################################### +#void aesni_gcm_enc_update_avx_gen2( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ +# const u8 *in, /* Plaintext input */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) + FUNC_SAVE + mov keysize, %eax + cmp $32, %eax + je key_256_enc_update + cmp $16, %eax + je key_128_enc_update + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 + FUNC_RESTORE + RET +key_128_enc_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 + FUNC_RESTORE + RET +key_256_enc_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) + +############################################################################### +#void aesni_gcm_dec_update_avx_gen2( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ +# const u8 *in, /* Ciphertext input */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_dec_update + cmp $16, %eax + je key_128_dec_update + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 + FUNC_RESTORE + RET +key_128_dec_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 + FUNC_RESTORE + RET +key_256_dec_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) + +############################################################################### +#void aesni_gcm_finalize_avx_gen2( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *auth_tag, /* Authenticated Tag output. */ +# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. +# Valid values are 16 (most likely), 12 or 8. */ +############################################################################### +SYM_FUNC_START(aesni_gcm_finalize_avx_gen2) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_finalize + cmp $16, %eax + je key_128_finalize + # must be 192 + GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 + FUNC_RESTORE + RET +key_128_finalize: + GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 + FUNC_RESTORE + RET +key_256_finalize: + GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) + +############################################################################### +# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) +# Input: A and B (128-bits each, bit-reflected) +# Output: C = A*B*x mod poly, (i.e. >>1 ) +# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input +# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. +############################################################################### +.macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 + + vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 + vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 + vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 + vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 + vpxor \T3, \GH, \GH + + + vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs + vpslldq $8 , \GH, \GH # shift-L GH 2 DWs + + vpxor \T3, \T1, \T1 + vpxor \T2, \GH, \GH + + ####################################################################### + #first phase of the reduction + vmovdqa POLY2(%rip), \T3 + + vpclmulqdq $0x01, \GH, \T3, \T2 + vpslldq $8, \T2, \T2 # shift-L T2 2 DWs + + vpxor \T2, \GH, \GH # first phase of the reduction complete + ####################################################################### + #second phase of the reduction + vpclmulqdq $0x00, \GH, \T3, \T2 + vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) + + vpclmulqdq $0x10, \GH, \T3, \GH + vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) + + vpxor \T2, \GH, \GH # second phase of the reduction complete + ####################################################################### + vpxor \T1, \GH, \GH # the result is in GH + + +.endm + +.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 + + # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i + vmovdqa \HK, \T5 + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly + vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly + + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly + vmovdqu \T5, HashKey_3(arg2) + + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly + vmovdqu \T5, HashKey_4(arg2) + + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly + vmovdqu \T5, HashKey_5(arg2) + + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly + vmovdqu \T5, HashKey_6(arg2) + + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly + vmovdqu \T5, HashKey_7(arg2) + + GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly + vmovdqu \T5, HashKey_8(arg2) + +.endm + +## if a = number of total plaintext bytes +## b = floor(a/16) +## num_initial_blocks = b mod 4# +## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext +## r10, r11, r12, rax are clobbered +## arg1, arg2, arg3, arg4 are used as pointers only, not modified + +.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER + i = (8-\num_initial_blocks) + setreg + vmovdqu AadHash(arg2), reg_i + + # start AES for num_initial_blocks blocks + vmovdqu CurCount(arg2), \CTR + + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, reg_i + vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap + i = (i+1) + setreg +.endr + + vmovdqa (arg1), \T_key + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vpxor \T_key, reg_i, reg_i + i = (i+1) + setreg +.endr + + j = 1 + setreg +.rep \REP + vmovdqa 16*j(arg1), \T_key + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vaesenc \T_key, reg_i, reg_i + i = (i+1) + setreg +.endr + + j = (j+1) + setreg +.endr + + + vmovdqa 16*j(arg1), \T_key + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vaesenclast \T_key, reg_i, reg_i + i = (i+1) + setreg +.endr + + i = (9-\num_initial_blocks) + setreg +.rep \num_initial_blocks + vmovdqu (arg4, %r11), \T1 + vpxor \T1, reg_i, reg_i + vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for + # num_initial_blocks blocks + add $16, %r11 +.if \ENC_DEC == DEC + vmovdqa \T1, reg_i +.endif + vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations + i = (i+1) + setreg +.endr + + + i = (8-\num_initial_blocks) + j = (9-\num_initial_blocks) + setreg + +.rep \num_initial_blocks + vpxor reg_i, reg_j, reg_j + GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks + i = (i+1) + j = (j+1) + setreg +.endr + # XMM8 has the combined result here + + vmovdqa \XMM8, TMP1(%rsp) + vmovdqa \XMM8, \T3 + + cmp $128, %r13 + jl _initial_blocks_done\@ # no need for precomputed constants + +############################################################################### +# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM1 + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM2 + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM3 + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM4 + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM5 + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM6 + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM7 + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + + vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 + vmovdqa \CTR, \XMM8 + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap + + vmovdqa (arg1), \T_key + vpxor \T_key, \XMM1, \XMM1 + vpxor \T_key, \XMM2, \XMM2 + vpxor \T_key, \XMM3, \XMM3 + vpxor \T_key, \XMM4, \XMM4 + vpxor \T_key, \XMM5, \XMM5 + vpxor \T_key, \XMM6, \XMM6 + vpxor \T_key, \XMM7, \XMM7 + vpxor \T_key, \XMM8, \XMM8 + + i = 1 + setreg +.rep \REP # do REP rounds + vmovdqa 16*i(arg1), \T_key + vaesenc \T_key, \XMM1, \XMM1 + vaesenc \T_key, \XMM2, \XMM2 + vaesenc \T_key, \XMM3, \XMM3 + vaesenc \T_key, \XMM4, \XMM4 + vaesenc \T_key, \XMM5, \XMM5 + vaesenc \T_key, \XMM6, \XMM6 + vaesenc \T_key, \XMM7, \XMM7 + vaesenc \T_key, \XMM8, \XMM8 + i = (i+1) + setreg +.endr + + + vmovdqa 16*i(arg1), \T_key + vaesenclast \T_key, \XMM1, \XMM1 + vaesenclast \T_key, \XMM2, \XMM2 + vaesenclast \T_key, \XMM3, \XMM3 + vaesenclast \T_key, \XMM4, \XMM4 + vaesenclast \T_key, \XMM5, \XMM5 + vaesenclast \T_key, \XMM6, \XMM6 + vaesenclast \T_key, \XMM7, \XMM7 + vaesenclast \T_key, \XMM8, \XMM8 + + vmovdqu (arg4, %r11), \T1 + vpxor \T1, \XMM1, \XMM1 + vmovdqu \XMM1, (arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM1 + .endif + + vmovdqu 16*1(arg4, %r11), \T1 + vpxor \T1, \XMM2, \XMM2 + vmovdqu \XMM2, 16*1(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM2 + .endif + + vmovdqu 16*2(arg4, %r11), \T1 + vpxor \T1, \XMM3, \XMM3 + vmovdqu \XMM3, 16*2(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM3 + .endif + + vmovdqu 16*3(arg4, %r11), \T1 + vpxor \T1, \XMM4, \XMM4 + vmovdqu \XMM4, 16*3(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM4 + .endif + + vmovdqu 16*4(arg4, %r11), \T1 + vpxor \T1, \XMM5, \XMM5 + vmovdqu \XMM5, 16*4(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM5 + .endif + + vmovdqu 16*5(arg4, %r11), \T1 + vpxor \T1, \XMM6, \XMM6 + vmovdqu \XMM6, 16*5(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM6 + .endif + + vmovdqu 16*6(arg4, %r11), \T1 + vpxor \T1, \XMM7, \XMM7 + vmovdqu \XMM7, 16*6(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM7 + .endif + + vmovdqu 16*7(arg4, %r11), \T1 + vpxor \T1, \XMM8, \XMM8 + vmovdqu \XMM8, 16*7(arg3 , %r11) + .if \ENC_DEC == DEC + vmovdqa \T1, \XMM8 + .endif + + add $128, %r11 + + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with + # the corresponding ciphertext + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap + +############################################################################### + +_initial_blocks_done\@: + + +.endm + + + +# encrypt 8 blocks at a time +# ghash the 8 previously encrypted ciphertext blocks +# arg1, arg2, arg3, arg4 are used as pointers only, not modified +# r11 is the data offset value +.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC + + vmovdqa \XMM1, \T2 + vmovdqa \XMM2, TMP2(%rsp) + vmovdqa \XMM3, TMP3(%rsp) + vmovdqa \XMM4, TMP4(%rsp) + vmovdqa \XMM5, TMP5(%rsp) + vmovdqa \XMM6, TMP6(%rsp) + vmovdqa \XMM7, TMP7(%rsp) + vmovdqa \XMM8, TMP8(%rsp) + +.if \loop_idx == in_order + vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT + vpaddd ONE(%rip), \XMM1, \XMM2 + vpaddd ONE(%rip), \XMM2, \XMM3 + vpaddd ONE(%rip), \XMM3, \XMM4 + vpaddd ONE(%rip), \XMM4, \XMM5 + vpaddd ONE(%rip), \XMM5, \XMM6 + vpaddd ONE(%rip), \XMM6, \XMM7 + vpaddd ONE(%rip), \XMM7, \XMM8 + vmovdqa \XMM8, \CTR + + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap +.else + vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT + vpaddd ONEf(%rip), \XMM1, \XMM2 + vpaddd ONEf(%rip), \XMM2, \XMM3 + vpaddd ONEf(%rip), \XMM3, \XMM4 + vpaddd ONEf(%rip), \XMM4, \XMM5 + vpaddd ONEf(%rip), \XMM5, \XMM6 + vpaddd ONEf(%rip), \XMM6, \XMM7 + vpaddd ONEf(%rip), \XMM7, \XMM8 + vmovdqa \XMM8, \CTR +.endif + + + ####################################################################### + + vmovdqu (arg1), \T1 + vpxor \T1, \XMM1, \XMM1 + vpxor \T1, \XMM2, \XMM2 + vpxor \T1, \XMM3, \XMM3 + vpxor \T1, \XMM4, \XMM4 + vpxor \T1, \XMM5, \XMM5 + vpxor \T1, \XMM6, \XMM6 + vpxor \T1, \XMM7, \XMM7 + vpxor \T1, \XMM8, \XMM8 + + ####################################################################### + + + + + + vmovdqu 16*1(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqu 16*2(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + + ####################################################################### + + vmovdqu HashKey_8(arg2), \T5 + vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 + vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 + vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 + vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 + vpxor \T5, \T6, \T6 + + vmovdqu 16*3(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP2(%rsp), \T1 + vmovdqu HashKey_7(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*4(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + ####################################################################### + + vmovdqa TMP3(%rsp), \T1 + vmovdqu HashKey_6(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*5(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP4(%rsp), \T1 + vmovdqu HashKey_5(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*6(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + + vmovdqa TMP5(%rsp), \T1 + vmovdqu HashKey_4(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*7(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP6(%rsp), \T1 + vmovdqu HashKey_3(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vmovdqu 16*8(arg1), \T1 + vaesenc \T1, \XMM1, \XMM1 + vaesenc \T1, \XMM2, \XMM2 + vaesenc \T1, \XMM3, \XMM3 + vaesenc \T1, \XMM4, \XMM4 + vaesenc \T1, \XMM5, \XMM5 + vaesenc \T1, \XMM6, \XMM6 + vaesenc \T1, \XMM7, \XMM7 + vaesenc \T1, \XMM8, \XMM8 + + vmovdqa TMP7(%rsp), \T1 + vmovdqu HashKey_2(arg2), \T5 + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T4 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + + ####################################################################### + + vmovdqu 16*9(arg1), \T5 + vaesenc \T5, \XMM1, \XMM1 + vaesenc \T5, \XMM2, \XMM2 + vaesenc \T5, \XMM3, \XMM3 + vaesenc \T5, \XMM4, \XMM4 + vaesenc \T5, \XMM5, \XMM5 + vaesenc \T5, \XMM6, \XMM6 + vaesenc \T5, \XMM7, \XMM7 + vaesenc \T5, \XMM8, \XMM8 + + vmovdqa TMP8(%rsp), \T1 + vmovdqu HashKey(arg2), \T5 + + vpclmulqdq $0x00, \T5, \T1, \T3 + vpxor \T3, \T7, \T7 + + vpclmulqdq $0x01, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x10, \T5, \T1, \T3 + vpxor \T3, \T6, \T6 + + vpclmulqdq $0x11, \T5, \T1, \T3 + vpxor \T3, \T4, \T1 + + + vmovdqu 16*10(arg1), \T5 + + i = 11 + setreg +.rep (\REP-9) + vaesenc \T5, \XMM1, \XMM1 + vaesenc \T5, \XMM2, \XMM2 + vaesenc \T5, \XMM3, \XMM3 + vaesenc \T5, \XMM4, \XMM4 + vaesenc \T5, \XMM5, \XMM5 + vaesenc \T5, \XMM6, \XMM6 + vaesenc \T5, \XMM7, \XMM7 + vaesenc \T5, \XMM8, \XMM8 + + vmovdqu 16*i(arg1), \T5 + i = i + 1 + setreg +.endr + + i = 0 + j = 1 + setreg +.rep 8 + vpxor 16*i(arg4, %r11), \T5, \T2 + .if \ENC_DEC == ENC + vaesenclast \T2, reg_j, reg_j + .else + vaesenclast \T2, reg_j, \T3 + vmovdqu 16*i(arg4, %r11), reg_j + vmovdqu \T3, 16*i(arg3, %r11) + .endif + i = (i+1) + j = (j+1) + setreg +.endr + ####################################################################### + + + vpslldq $8, \T6, \T3 # shift-L T3 2 DWs + vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs + vpxor \T3, \T7, \T7 + vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 + + + + ####################################################################### + #first phase of the reduction + vmovdqa POLY2(%rip), \T3 + + vpclmulqdq $0x01, \T7, \T3, \T2 + vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs + + vpxor \T2, \T7, \T7 # first phase of the reduction complete + ####################################################################### + .if \ENC_DEC == ENC + vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer + .endif + + ####################################################################### + #second phase of the reduction + vpclmulqdq $0x00, \T7, \T3, \T2 + vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) + + vpclmulqdq $0x10, \T7, \T3, \T4 + vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) + + vpxor \T2, \T4, \T4 # second phase of the reduction complete + ####################################################################### + vpxor \T4, \T1, \T1 # the result is in T1 + + vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap + vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap + + + vpxor \T1, \XMM1, \XMM1 + + + +.endm + + +# GHASH the last 4 ciphertext blocks. +.macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 + + ## Karatsuba Method + + vmovdqu HashKey_8(arg2), \T5 + + vpshufd $0b01001110, \XMM1, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM1, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM1, \T6 + vpclmulqdq $0x00, \T5, \XMM1, \T7 + + vpclmulqdq $0x00, \T3, \T2, \XMM1 + + ###################### + + vmovdqu HashKey_7(arg2), \T5 + vpshufd $0b01001110, \XMM2, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM2, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM2, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM2, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vmovdqu HashKey_6(arg2), \T5 + vpshufd $0b01001110, \XMM3, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM3, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM3, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM3, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vmovdqu HashKey_5(arg2), \T5 + vpshufd $0b01001110, \XMM4, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM4, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM4, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM4, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vmovdqu HashKey_4(arg2), \T5 + vpshufd $0b01001110, \XMM5, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM5, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM5, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM5, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vmovdqu HashKey_3(arg2), \T5 + vpshufd $0b01001110, \XMM6, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM6, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM6, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM6, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vmovdqu HashKey_2(arg2), \T5 + vpshufd $0b01001110, \XMM7, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM7, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM7, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM7, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + + ###################### + + vmovdqu HashKey(arg2), \T5 + vpshufd $0b01001110, \XMM8, \T2 + vpshufd $0b01001110, \T5, \T3 + vpxor \XMM8, \T2, \T2 + vpxor \T5, \T3, \T3 + + vpclmulqdq $0x11, \T5, \XMM8, \T4 + vpxor \T4, \T6, \T6 + + vpclmulqdq $0x00, \T5, \XMM8, \T4 + vpxor \T4, \T7, \T7 + + vpclmulqdq $0x00, \T3, \T2, \T2 + + vpxor \T2, \XMM1, \XMM1 + vpxor \T6, \XMM1, \XMM1 + vpxor \T7, \XMM1, \T2 + + + + + vpslldq $8, \T2, \T4 + vpsrldq $8, \T2, \T2 + + vpxor \T4, \T7, \T7 + vpxor \T2, \T6, \T6 # holds the result of the + # accumulated carry-less multiplications + + ####################################################################### + #first phase of the reduction + vmovdqa POLY2(%rip), \T3 + + vpclmulqdq $0x01, \T7, \T3, \T2 + vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs + + vpxor \T2, \T7, \T7 # first phase of the reduction complete + ####################################################################### + + + #second phase of the reduction + vpclmulqdq $0x00, \T7, \T3, \T2 + vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) + + vpclmulqdq $0x10, \T7, \T3, \T4 + vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) + + vpxor \T2, \T4, \T4 # second phase of the reduction complete + ####################################################################### + vpxor \T4, \T6, \T6 # the result is in T6 +.endm + + + +############################################################# +#void aesni_gcm_init_avx_gen4 +# (gcm_data *my_ctx_data, +# gcm_context_data *data, +# u8 *iv, /* Pre-counter block j0: 4 byte salt +# (from Security Association) concatenated with 8 byte +# Initialisation Vector (from IPSec ESP Payload) +# concatenated with 0x00000001. 16-byte aligned pointer. */ +# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ +# const u8 *aad, /* Additional Authentication Data (AAD)*/ +# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ +############################################################# +SYM_FUNC_START(aesni_gcm_init_avx_gen4) + FUNC_SAVE + INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_init_avx_gen4) + +############################################################################### +#void aesni_gcm_enc_avx_gen4( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ +# const u8 *in, /* Plaintext input */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_enc_update4 + cmp $16, %eax + je key_128_enc_update4 + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 + FUNC_RESTORE + RET +key_128_enc_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 + FUNC_RESTORE + RET +key_256_enc_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) + +############################################################################### +#void aesni_gcm_dec_update_avx_gen4( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ +# const u8 *in, /* Ciphertext input */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_dec_update4 + cmp $16, %eax + je key_128_dec_update4 + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 + FUNC_RESTORE + RET +key_128_dec_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 + FUNC_RESTORE + RET +key_256_dec_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) + +############################################################################### +#void aesni_gcm_finalize_avx_gen4( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *auth_tag, /* Authenticated Tag output. */ +# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. +# Valid values are 16 (most likely), 12 or 8. */ +############################################################################### +SYM_FUNC_START(aesni_gcm_finalize_avx_gen4) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_finalize4 + cmp $16, %eax + je key_128_finalize4 + # must be 192 + GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 + FUNC_RESTORE + RET +key_128_finalize4: + GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 + FUNC_RESTORE + RET +key_256_finalize4: + GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 + FUNC_RESTORE + RET +SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c new file mode 100644 index 000000000..a5b0cb3ef --- /dev/null +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -0,0 +1,1319 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for Intel AES-NI instructions. This file contains glue + * code, the real AES implementation is in intel-aes_asm.S. + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define AESNI_ALIGN 16 +#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN))) +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1)) +#define RFC4106_HASH_SUBKEY_SIZE 16 +#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) +#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) +#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) + +/* This data is stored at the end of the crypto_tfm struct. + * It's a type of per "session" data storage location. + * This needs to be 16 byte aligned. + */ +struct aesni_rfc4106_gcm_ctx { + u8 hash_subkey[16] AESNI_ALIGN_ATTR; + struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; + u8 nonce[4]; +}; + +struct generic_gcmaes_ctx { + u8 hash_subkey[16] AESNI_ALIGN_ATTR; + struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; +}; + +struct aesni_xts_ctx { + u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; + u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; +}; + +#define GCM_BLOCK_LEN 16 + +struct gcm_context_data { + /* init, update and finalize context data */ + u8 aad_hash[GCM_BLOCK_LEN]; + u64 aad_length; + u64 in_length; + u8 partial_block_enc_key[GCM_BLOCK_LEN]; + u8 orig_IV[GCM_BLOCK_LEN]; + u8 current_counter[GCM_BLOCK_LEN]; + u64 partial_block_len; + u64 unused; + u8 hash_keys[GCM_BLOCK_LEN * 16]; +}; + +asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); +asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in); +asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in); +asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +#define AVX_GEN2_OPTSIZE 640 +#define AVX_GEN4_OPTSIZE 4096 + +asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +#ifdef CONFIG_X86_64 + +asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc); + +/* Scatter / Gather routines, with args similar to above */ +asmlinkage void aesni_gcm_init(void *ctx, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, const u8 *aad, + unsigned long aad_len); +asmlinkage void aesni_gcm_enc_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); + +asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); + + +asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, + const void *keys, u8 *out, unsigned int num_bytes, + unsigned int byte_ctr); + +asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, + const void *keys, u8 *out, unsigned int num_bytes, + unsigned int byte_ctr); + +asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, + const void *keys, u8 *out, unsigned int num_bytes, + unsigned int byte_ctr); + +/* + * asmlinkage void aesni_gcm_init_avx_gen2() + * gcm_data *my_ctx_data, context data + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + */ +asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, + const u8 *aad, + unsigned long aad_len); + +asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); + +/* + * asmlinkage void aesni_gcm_init_avx_gen4() + * gcm_data *my_ctx_data, context data + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + */ +asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, + const u8 *aad, + unsigned long aad_len); + +asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2); + +static inline struct +aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) +{ + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return PTR_ALIGN(crypto_aead_ctx(tfm), align); +} + +static inline struct +generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm) +{ + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return PTR_ALIGN(crypto_aead_ctx(tfm), align); +} +#endif + +static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) +{ + unsigned long addr = (unsigned long)raw_ctx; + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (struct crypto_aes_ctx *)ALIGN(addr, align); +} + +static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, + const u8 *in_key, unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); + int err; + + if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) + return -EINVAL; + + if (!crypto_simd_usable()) + err = aes_expandkey(ctx, in_key, key_len); + else { + kernel_fpu_begin(); + err = aesni_set_key(ctx, in_key, key_len); + kernel_fpu_end(); + } + + return err; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); +} + +static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (!crypto_simd_usable()) { + aes_encrypt(ctx, dst, src); + } else { + kernel_fpu_begin(); + aesni_enc(ctx, dst, src); + kernel_fpu_end(); + } +} + +static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (!crypto_simd_usable()) { + aes_decrypt(ctx, dst, src); + } else { + kernel_fpu_begin(); + aesni_dec(ctx, dst, src); + kernel_fpu_end(); + } +} + +static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int len) +{ + return aes_set_key_common(crypto_skcipher_tfm(tfm), + crypto_skcipher_ctx(tfm), key, len); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); + aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); + aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); + aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); + aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int cts_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + if (req->cryptlen <= AES_BLOCK_SIZE) { + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + cbc_blocks = 1; + } + + if (cbc_blocks > 0) { + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = cbc_encrypt(&subreq); + if (err) + return err; + + if (req->cryptlen == AES_BLOCK_SIZE) + return 0; + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + aesni_cts_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int cts_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + if (req->cryptlen <= AES_BLOCK_SIZE) { + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + cbc_blocks = 1; + } + + if (cbc_blocks > 0) { + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = cbc_decrypt(&subreq); + if (err) + return err; + + if (req->cryptlen == AES_BLOCK_SIZE) + return 0; + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + aesni_cts_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); +} + +#ifdef CONFIG_X86_64 +static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv) +{ + /* + * based on key length, override with the by8 version + * of ctr mode encryption/decryption for improved performance + * aes_set_key_common() ensures that key length is one of + * {128,192,256} + */ + if (ctx->key_length == AES_KEYSIZE_128) + aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); + else if (ctx->key_length == AES_KEYSIZE_192) + aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); + else + aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); +} + +static int ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + u8 keystream[AES_BLOCK_SIZE]; + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + kernel_fpu_begin(); + if (nbytes & AES_BLOCK_MASK) + static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, + walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, + walk.iv); + nbytes &= ~AES_BLOCK_MASK; + + if (walk.nbytes == walk.total && nbytes > 0) { + aesni_enc(ctx, keystream, walk.iv); + crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes, + walk.src.virt.addr + walk.nbytes - nbytes, + keystream, nbytes); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + nbytes = 0; + } + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + return err; +} + +static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv, + unsigned int byte_ctr) +{ + if (ctx->key_length == AES_KEYSIZE_128) + aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len, + byte_ctr); + else if (ctx->key_length == AES_KEYSIZE_192) + aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len, + byte_ctr); + else + aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len, + byte_ctr); +} + +static int xctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + u8 keystream[AES_BLOCK_SIZE]; + struct skcipher_walk walk; + unsigned int nbytes; + unsigned int byte_ctr = 0; + int err; + __le32 block[AES_BLOCK_SIZE / sizeof(__le32)]; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + kernel_fpu_begin(); + if (nbytes & AES_BLOCK_MASK) + aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr, + walk.src.virt.addr, nbytes & AES_BLOCK_MASK, + walk.iv, byte_ctr); + nbytes &= ~AES_BLOCK_MASK; + byte_ctr += walk.nbytes - nbytes; + + if (walk.nbytes == walk.total && nbytes > 0) { + memcpy(block, walk.iv, AES_BLOCK_SIZE); + block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE); + aesni_enc(ctx, keystream, (u8 *)block); + crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - + nbytes, walk.src.virt.addr + walk.nbytes + - nbytes, keystream, nbytes); + byte_ctr += nbytes; + nbytes = 0; + } + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + return err; +} + +static int +rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) +{ + struct crypto_aes_ctx ctx; + int ret; + + ret = aes_expandkey(&ctx, key, key_len); + if (ret) + return ret; + + /* Clear the data in the hash sub key container to zero.*/ + /* We want to cipher all zeros to create the hash sub key. */ + memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); + + aes_encrypt(&ctx, hash_subkey, hash_subkey); + + memzero_explicit(&ctx, sizeof(ctx)); + return 0; +} + +static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, + unsigned int key_len) +{ + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); + + if (key_len < 4) + return -EINVAL; + + /*Account for 4 byte nonce at the end.*/ + key_len -= 4; + + memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); + + return aes_set_key_common(crypto_aead_tfm(aead), + &ctx->aes_key_expanded, key, key_len) ?: + rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); +} + +/* This is the Integrity Check Value (aka the authentication tag) length and can + * be 8, 12 or 16 bytes long. */ +static int common_rfc4106_set_authsize(struct crypto_aead *aead, + unsigned int authsize) +{ + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12: + case 13: + case 14: + case 15: + case 16: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, + unsigned int assoclen, u8 *hash_subkey, + u8 *iv, void *aes_ctx, u8 *auth_tag, + unsigned long auth_tag_len) +{ + u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); + struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); + unsigned long left = req->cryptlen; + struct scatter_walk assoc_sg_walk; + struct skcipher_walk walk; + bool do_avx, do_avx2; + u8 *assocmem = NULL; + u8 *assoc; + int err; + + if (!enc) + left -= auth_tag_len; + + do_avx = (left >= AVX_GEN2_OPTSIZE); + do_avx2 = (left >= AVX_GEN4_OPTSIZE); + + /* Linearize assoc, if not already linear */ + if (req->src->length >= assoclen && req->src->length) { + scatterwalk_start(&assoc_sg_walk, req->src); + assoc = scatterwalk_map(&assoc_sg_walk); + } else { + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + + /* assoc can be any length, so must be on heap */ + assocmem = kmalloc(assoclen, flags); + if (unlikely(!assocmem)) + return -ENOMEM; + assoc = assocmem; + + scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); + } + + kernel_fpu_begin(); + if (static_branch_likely(&gcm_use_avx2) && do_avx2) + aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc, + assoclen); + else if (static_branch_likely(&gcm_use_avx) && do_avx) + aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc, + assoclen); + else + aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); + kernel_fpu_end(); + + if (!assocmem) + scatterwalk_unmap(assoc); + else + kfree(assocmem); + + err = enc ? skcipher_walk_aead_encrypt(&walk, req, false) + : skcipher_walk_aead_decrypt(&walk, req, false); + + while (walk.nbytes > 0) { + kernel_fpu_begin(); + if (static_branch_likely(&gcm_use_avx2) && do_avx2) { + if (enc) + aesni_gcm_enc_update_avx_gen4(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + else + aesni_gcm_dec_update_avx_gen4(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + } else if (static_branch_likely(&gcm_use_avx) && do_avx) { + if (enc) + aesni_gcm_enc_update_avx_gen2(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + else + aesni_gcm_dec_update_avx_gen2(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + } else if (enc) { + aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes); + } else { + aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes); + } + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, 0); + } + + if (err) + return err; + + kernel_fpu_begin(); + if (static_branch_likely(&gcm_use_avx2) && do_avx2) + aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag, + auth_tag_len); + else if (static_branch_likely(&gcm_use_avx) && do_avx) + aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag, + auth_tag_len); + else + aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len); + kernel_fpu_end(); + + return 0; +} + +static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, + u8 *hash_subkey, u8 *iv, void *aes_ctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 auth_tag[16]; + int err; + + err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx, + auth_tag, auth_tag_len); + if (err) + return err; + + scatterwalk_map_and_copy(auth_tag, req->dst, + req->assoclen + req->cryptlen, + auth_tag_len, 1); + return 0; +} + +static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, + u8 *hash_subkey, u8 *iv, void *aes_ctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 auth_tag_msg[16]; + u8 auth_tag[16]; + int err; + + err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx, + auth_tag, auth_tag_len); + if (err) + return err; + + /* Copy out original auth_tag */ + scatterwalk_map_and_copy(auth_tag_msg, req->src, + req->assoclen + req->cryptlen - auth_tag_len, + auth_tag_len, 0); + + /* Compare generated tag with passed in tag. */ + if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) { + memzero_explicit(auth_tag, sizeof(auth_tag)); + return -EBADMSG; + } + return 0; +} + +static int helper_rfc4106_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); + unsigned int i; + __be32 counter = cpu_to_be32(1); + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length equal */ + /* to 16 or 20 bytes */ + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, + aes_ctx); +} + +static int helper_rfc4106_decrypt(struct aead_request *req) +{ + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); + unsigned int i; + + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 16 or 20 bytes */ + + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, + aes_ctx); +} +#endif + +static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + /* first half of xts-key is for crypt */ + err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, + key, keylen); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, + key + keylen, keylen); +} + +static int xts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err; + + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + + err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err; + } else { + tail = 0; + } + + kernel_fpu_begin(); + + /* calculate first value of T */ + aesni_enc(aes_ctx(ctx->raw_tweak_ctx), walk.iv, walk.iv); + + while (walk.nbytes > 0) { + int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + if (encrypt) + aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + nbytes, walk.iv); + else + aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + nbytes, walk.iv); + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (walk.nbytes > 0) + kernel_fpu_begin(); + } + + if (unlikely(tail > 0 && !err)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + if (encrypt) + aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + else + aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, 0); + } + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return xts_crypt(req, true); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return xts_crypt(req, false); +} + +static struct crypto_alg aesni_cipher_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-aesni", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aesni_encrypt, + .cia_decrypt = aesni_decrypt + } + } +}; + +static struct skcipher_alg aesni_skciphers[] = { + { + .base = { + .cra_name = "__ecb(aes)", + .cra_driver_name = "__ecb-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cts(cbc(aes))", + .cra_driver_name = "__cts-cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = cts_cbc_encrypt, + .decrypt = cts_cbc_decrypt, +#ifdef CONFIG_X86_64 + }, { + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, +#endif + }, { + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-aesni", + .cra_priority = 401, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = XTS_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, + .setkey = xts_aesni_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + } +}; + +static +struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; + +#ifdef CONFIG_X86_64 +/* + * XCTR does not have a non-AVX implementation, so it must be enabled + * conditionally. + */ +static struct skcipher_alg aesni_xctr = { + .base = { + .cra_name = "__xctr(aes)", + .cra_driver_name = "__xctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = xctr_crypt, + .decrypt = xctr_crypt, +}; + +static struct simd_skcipher_alg *aesni_simd_xctr; +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_64 +static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, + unsigned int key_len) +{ + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); + + return aes_set_key_common(crypto_aead_tfm(aead), + &ctx->aes_key_expanded, key, key_len) ?: + rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); +} + +static int generic_gcmaes_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); + __be32 counter = cpu_to_be32(1); + + memcpy(iv, req->iv, 12); + *((__be32 *)(iv+12)) = counter; + + return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv, + aes_ctx); +} + +static int generic_gcmaes_decrypt(struct aead_request *req) +{ + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); + + memcpy(iv, req->iv, 12); + *((__be32 *)(iv+12)) = counter; + + return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv, + aes_ctx); +} + +static struct aead_alg aesni_aeads[] = { { + .setkey = common_rfc4106_set_key, + .setauthsize = common_rfc4106_set_authsize, + .encrypt = helper_rfc4106_encrypt, + .decrypt = helper_rfc4106_decrypt, + .ivsize = GCM_RFC4106_IV_SIZE, + .maxauthsize = 16, + .base = { + .cra_name = "__rfc4106(gcm(aes))", + .cra_driver_name = "__rfc4106-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, +}, { + .setkey = generic_gcmaes_set_key, + .setauthsize = generic_gcmaes_set_authsize, + .encrypt = generic_gcmaes_encrypt, + .decrypt = generic_gcmaes_decrypt, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = 16, + .base = { + .cra_name = "__gcm(aes)", + .cra_driver_name = "__generic-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, +} }; +#else +static struct aead_alg aesni_aeads[0]; +#endif + +static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_MATCH_FEATURE(X86_FEATURE_AES, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + +static int __init aesni_init(void) +{ + int err; + + if (!x86_match_cpu(aesni_cpu_id)) + return -ENODEV; +#ifdef CONFIG_X86_64 + if (boot_cpu_has(X86_FEATURE_AVX2)) { + pr_info("AVX2 version of gcm_enc/dec engaged.\n"); + static_branch_enable(&gcm_use_avx); + static_branch_enable(&gcm_use_avx2); + } else + if (boot_cpu_has(X86_FEATURE_AVX)) { + pr_info("AVX version of gcm_enc/dec engaged.\n"); + static_branch_enable(&gcm_use_avx); + } else { + pr_info("SSE version of gcm_enc/dec engaged.\n"); + } + if (boot_cpu_has(X86_FEATURE_AVX)) { + /* optimize performance of ctr mode encryption transform */ + static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm); + pr_info("AES CTR mode by8 optimization enabled\n"); + } +#endif /* CONFIG_X86_64 */ + + err = crypto_register_alg(&aesni_cipher_alg); + if (err) + return err; + + err = simd_register_skciphers_compat(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); + if (err) + goto unregister_cipher; + + err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); + if (err) + goto unregister_skciphers; + +#ifdef CONFIG_X86_64 + if (boot_cpu_has(X86_FEATURE_AVX)) + err = simd_register_skciphers_compat(&aesni_xctr, 1, + &aesni_simd_xctr); + if (err) + goto unregister_aeads; +#endif /* CONFIG_X86_64 */ + + return 0; + +#ifdef CONFIG_X86_64 +unregister_aeads: + simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); +#endif /* CONFIG_X86_64 */ + +unregister_skciphers: + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); +unregister_cipher: + crypto_unregister_alg(&aesni_cipher_alg); + return err; +} + +static void __exit aesni_exit(void) +{ + simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); + crypto_unregister_alg(&aesni_cipher_alg); +#ifdef CONFIG_X86_64 + if (boot_cpu_has(X86_FEATURE_AVX)) + simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); +#endif /* CONFIG_X86_64 */ +} + +late_initcall(aesni_init); +module_exit(aesni_exit); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S new file mode 100644 index 000000000..03ae4cd1d --- /dev/null +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -0,0 +1,1304 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ARIA Cipher 16-way parallel algorithm (AVX) + * + * Copyright (c) 2022 Taehee Yoo + * + */ + +#include +#include +#include + +/* struct aria_ctx: */ +#define enc_key 0 +#define dec_key 272 +#define rounds 544 + +/* register macros */ +#define CTX %rdi + + +#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \ + ( (((a0) & 1) << 0) | \ + (((a1) & 1) << 1) | \ + (((a2) & 1) << 2) | \ + (((a3) & 1) << 3) | \ + (((a4) & 1) << 4) | \ + (((a5) & 1) << 5) | \ + (((a6) & 1) << 6) | \ + (((a7) & 1) << 7) ) + +#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \ + ( ((l7) << (0 * 8)) | \ + ((l6) << (1 * 8)) | \ + ((l5) << (2 * 8)) | \ + ((l4) << (3 * 8)) | \ + ((l3) << (4 * 8)) | \ + ((l2) << (5 * 8)) | \ + ((l1) << (6 * 8)) | \ + ((l0) << (7 * 8)) ) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +#define debyteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(c0, d0, a0, b0, d2, d3); \ + transpose_4x4(c1, d1, a1, b1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(c2, d2, a2, b2, b0, b1); \ + transpose_4x4(c3, d3, a3, b3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + rio) \ + vmovdqu (0 * 16)(rio), x0; \ + vmovdqu (1 * 16)(rio), x1; \ + vmovdqu (2 * 16)(rio), x2; \ + vmovdqu (3 * 16)(rio), x3; \ + vmovdqu (4 * 16)(rio), x4; \ + vmovdqu (5 * 16)(rio), x5; \ + vmovdqu (6 * 16)(rio), x6; \ + vmovdqu (7 * 16)(rio), x7; \ + vmovdqu (8 * 16)(rio), y0; \ + vmovdqu (9 * 16)(rio), y1; \ + vmovdqu (10 * 16)(rio), y2; \ + vmovdqu (11 * 16)(rio), y3; \ + vmovdqu (12 * 16)(rio), y4; \ + vmovdqu (13 * 16)(rio), y5; \ + vmovdqu (14 * 16)(rio), y6; \ + vmovdqu (15 * 16)(rio), y7; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_ab, mem_cd) \ + byteslice_16x16b(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); \ + vmovdqu y0, 0 * 16(mem_cd); \ + vmovdqu y1, 1 * 16(mem_cd); \ + vmovdqu y2, 2 * 16(mem_cd); \ + vmovdqu y3, 3 * 16(mem_cd); \ + vmovdqu y4, 4 * 16(mem_cd); \ + vmovdqu y5, 5 * 16(mem_cd); \ + vmovdqu y6, 6 * 16(mem_cd); \ + vmovdqu y7, 7 * 16(mem_cd); + +#define write_output(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem) \ + vmovdqu x0, 0 * 16(mem); \ + vmovdqu x1, 1 * 16(mem); \ + vmovdqu x2, 2 * 16(mem); \ + vmovdqu x3, 3 * 16(mem); \ + vmovdqu x4, 4 * 16(mem); \ + vmovdqu x5, 5 * 16(mem); \ + vmovdqu x6, 6 * 16(mem); \ + vmovdqu x7, 7 * 16(mem); \ + vmovdqu y0, 8 * 16(mem); \ + vmovdqu y1, 9 * 16(mem); \ + vmovdqu y2, 10 * 16(mem); \ + vmovdqu y3, 11 * 16(mem); \ + vmovdqu y4, 12 * 16(mem); \ + vmovdqu y5, 13 * 16(mem); \ + vmovdqu y6, 14 * 16(mem); \ + vmovdqu y7, 15 * 16(mem); \ + +#define aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu x0, ((idx + 0) * 16)(mem_tmp); \ + vmovdqu x1, ((idx + 1) * 16)(mem_tmp); \ + vmovdqu x2, ((idx + 2) * 16)(mem_tmp); \ + vmovdqu x3, ((idx + 3) * 16)(mem_tmp); \ + vmovdqu x4, ((idx + 4) * 16)(mem_tmp); \ + vmovdqu x5, ((idx + 5) * 16)(mem_tmp); \ + vmovdqu x6, ((idx + 6) * 16)(mem_tmp); \ + vmovdqu x7, ((idx + 7) * 16)(mem_tmp); + +#define aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu ((idx + 0) * 16)(mem_tmp), x0; \ + vmovdqu ((idx + 1) * 16)(mem_tmp), x1; \ + vmovdqu ((idx + 2) * 16)(mem_tmp), x2; \ + vmovdqu ((idx + 3) * 16)(mem_tmp), x3; \ + vmovdqu ((idx + 4) * 16)(mem_tmp), x4; \ + vmovdqu ((idx + 5) * 16)(mem_tmp), x5; \ + vmovdqu ((idx + 6) * 16)(mem_tmp), x6; \ + vmovdqu ((idx + 7) * 16)(mem_tmp), x7; + +#define aria_ark_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, rk, idx, round) \ + /* AddRoundKey */ \ + vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \ + vpxor t0, x0, x0; \ + vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \ + vpxor t0, x1, x1; \ + vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \ + vpxor t0, x2, x2; \ + vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \ + vpxor t0, x3, x3; \ + vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \ + vpxor t0, x4, x4; \ + vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \ + vpxor t0, x5, x5; \ + vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \ + vpxor t0, x6, x6; \ + vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ + vpxor t0, x7, x7; + +#define aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ + vpbroadcastq .Ltf_id_bitmatrix, t2; \ + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ + vgf2p8affineinvqb $0, t2, x2, x2; \ + vgf2p8affineinvqb $0, t2, x6, x6; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ + vgf2p8affineinvqb $0, t2, x3, x3; \ + vgf2p8affineinvqb $0, t2, x7, x7 + +#define aria_sbox_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpxor t7, t7, t7; \ + vmovdqa .Linv_shift_row, t0; \ + vmovdqa .Lshift_row, t1; \ + vpbroadcastd .L0f0f0f0f, t6; \ + vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \ + vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \ + vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \ + vmovdqa .Ltf_hi__x2__and__fwd_aff, t5; \ + \ + vaesenclast t7, x0, x0; \ + vaesenclast t7, x4, x4; \ + vaesenclast t7, x1, x1; \ + vaesenclast t7, x5, x5; \ + vaesdeclast t7, x2, x2; \ + vaesdeclast t7, x6, x6; \ + \ + /* AES inverse shift rows */ \ + vpshufb t0, x0, x0; \ + vpshufb t0, x4, x4; \ + vpshufb t0, x1, x1; \ + vpshufb t0, x5, x5; \ + vpshufb t1, x3, x3; \ + vpshufb t1, x7, x7; \ + vpshufb t1, x2, x2; \ + vpshufb t1, x6, x6; \ + \ + /* affine transformation for S2 */ \ + filter_8bit(x1, t2, t3, t6, t0); \ + /* affine transformation for S2 */ \ + filter_8bit(x5, t2, t3, t6, t0); \ + \ + /* affine transformation for X2 */ \ + filter_8bit(x3, t4, t5, t6, t0); \ + /* affine transformation for X2 */ \ + filter_8bit(x7, t4, t5, t6, t0); \ + vaesdeclast t7, x3, x3; \ + vaesdeclast t7, x7, x7; + +#define aria_diff_m(x0, x1, x2, x3, \ + t0, t1, t2, t3) \ + /* T = rotr32(X, 8); */ \ + /* X ^= T */ \ + vpxor x0, x3, t0; \ + vpxor x1, x0, t1; \ + vpxor x2, x1, t2; \ + vpxor x3, x2, t3; \ + /* X = T ^ rotr(X, 16); */ \ + vpxor t2, x0, x0; \ + vpxor x1, t3, t3; \ + vpxor t0, x2, x2; \ + vpxor t1, x3, x1; \ + vmovdqu t3, x3; + +#define aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7) \ + /* t1 ^= t2; */ \ + vpxor y0, x4, x4; \ + vpxor y1, x5, x5; \ + vpxor y2, x6, x6; \ + vpxor y3, x7, x7; \ + \ + /* t2 ^= t3; */ \ + vpxor y4, y0, y0; \ + vpxor y5, y1, y1; \ + vpxor y6, y2, y2; \ + vpxor y7, y3, y3; \ + \ + /* t0 ^= t1; */ \ + vpxor x4, x0, x0; \ + vpxor x5, x1, x1; \ + vpxor x6, x2, x2; \ + vpxor x7, x3, x3; \ + \ + /* t3 ^= t1; */ \ + vpxor x4, y4, y4; \ + vpxor x5, y5, y5; \ + vpxor x6, y6, y6; \ + vpxor x7, y7, y7; \ + \ + /* t2 ^= t0; */ \ + vpxor x0, y0, y0; \ + vpxor x1, y1, y1; \ + vpxor x2, y2, y2; \ + vpxor x3, y3, y3; \ + \ + /* t1 ^= t2; */ \ + vpxor y0, x4, x4; \ + vpxor y1, x5, x5; \ + vpxor y2, x6, x6; \ + vpxor y3, x7, x7; + +#define aria_fe(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_fo(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_ff(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, last_round); \ + \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, last_round); \ + \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); + +#define aria_fe_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_fo_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_ff_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, last_round); \ + \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, last_round); \ + \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3); +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.Lshift_row: + .byte 0x00, 0x05, 0x0a, 0x0f, 0x04, 0x09, 0x0e, 0x03 + .byte 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + +/* AES inverse affine and S2 combined: + * 1 1 0 0 0 0 0 1 x0 0 + * 0 1 0 0 1 0 0 0 x1 0 + * 1 1 0 0 1 1 1 1 x2 0 + * 0 1 1 0 1 0 0 1 x3 1 + * 0 1 0 0 1 1 0 0 * x4 + 0 + * 0 1 0 1 1 0 0 0 x5 0 + * 0 0 0 0 0 1 0 1 x6 0 + * 1 1 1 0 0 1 1 1 x7 1 + */ +.Ltf_lo__inv_aff__and__s2: + .octa 0x92172DA81A9FA520B2370D883ABF8500 +.Ltf_hi__inv_aff__and__s2: + .octa 0x2B15FFC1AF917B45E6D8320C625CB688 + +/* X2 and AES forward affine combined: + * 1 0 1 1 0 0 0 1 x0 0 + * 0 1 1 1 1 0 1 1 x1 0 + * 0 0 0 1 1 0 1 0 x2 1 + * 0 1 0 0 0 1 0 0 x3 0 + * 0 0 1 1 1 0 1 1 * x4 + 0 + * 0 1 0 0 1 0 0 0 x5 0 + * 1 1 0 1 0 0 1 1 x6 0 + * 0 1 0 0 1 0 1 0 x7 0 + */ +.Ltf_lo__x2__and__fwd_aff: + .octa 0xEFAE0544FCBD1657B8F95213ABEA4100 +.Ltf_hi__x2__and__fwd_aff: + .octa 0x3F893781E95FE1576CDA64D2BA0CB204 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 +/* AES affine: */ +#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) +.Ltf_aff_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), + BV8(1, 1, 0, 0, 0, 1, 1, 1), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 0, 0, 1), + BV8(1, 1, 1, 1, 1, 0, 0, 0), + BV8(0, 1, 1, 1, 1, 1, 0, 0), + BV8(0, 0, 1, 1, 1, 1, 1, 0), + BV8(0, 0, 0, 1, 1, 1, 1, 1)) + +/* AES inverse affine: */ +#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) +.Ltf_inv_bitmatrix: + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 0), + BV8(0, 1, 0, 0, 1, 0, 0, 1), + BV8(1, 0, 1, 0, 0, 1, 0, 0), + BV8(0, 1, 0, 1, 0, 0, 1, 0), + BV8(0, 0, 1, 0, 1, 0, 0, 1), + BV8(1, 0, 0, 1, 0, 1, 0, 0), + BV8(0, 1, 0, 0, 1, 0, 1, 0)) + +/* S2: */ +#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) +.Ltf_s2_bitmatrix: + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), + BV8(0, 0, 1, 1, 1, 1, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 1), + BV8(1, 1, 0, 0, 0, 0, 1, 1), + BV8(0, 1, 0, 0, 0, 0, 1, 1), + BV8(1, 1, 0, 0, 1, 1, 1, 0), + BV8(0, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 1, 1, 0)) + +/* X2: */ +#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) +.Ltf_x2_bitmatrix: + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 1, 1, 0), + BV8(0, 0, 0, 0, 1, 0, 1, 0), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 0), + BV8(0, 1, 1, 0, 1, 0, 1, 1), + BV8(1, 0, 1, 1, 1, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 1)) + +/* Identity matrix: */ +.Ltf_id_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), + BV8(0, 1, 0, 0, 0, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 0, 0, 0), + BV8(0, 0, 0, 1, 0, 0, 0, 0), + BV8(0, 0, 0, 0, 1, 0, 0, 0), + BV8(0, 0, 0, 0, 0, 1, 0, 0), + BV8(0, 0, 0, 0, 0, 0, 1, 0), + BV8(0, 0, 0, 0, 0, 0, 0, 1)) + +/* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 +.align 4 +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text + +SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %xmm0..%xmm15: 16 byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 16(%rax), %r8; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r8); + aria_fo(%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 0); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 1); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 2); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 3); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 4); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 5); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 6); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 7); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 8); + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 9); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 10); + cmpl $12, rounds(CTX); + jne .Laria_192; + aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11, 12); + jmp .Laria_end; +.Laria_192: + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 12); + cmpl $14, rounds(CTX); + jne .Laria_256; + aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13, 14); + jmp .Laria_end; +.Laria_256: + aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13); + aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 14); + aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 15, 16); +.Laria_end: + debyteslice_16x16b(%xmm8, %xmm12, %xmm1, %xmm4, + %xmm9, %xmm13, %xmm0, %xmm5, + %xmm10, %xmm14, %xmm3, %xmm6, + %xmm11, %xmm15, %xmm2, %xmm7, + (%rax), (%r8)); + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx_crypt_16way) + +SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq enc_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_encrypt_16way) + +SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq dec_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_decrypt_16way) + +SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + + FRAME_BEGIN + /* load IV and byteswap */ + vmovdqu (%r8), %xmm8; + + vmovdqa .Lbswap128_mask (%rip), %xmm1; + vpshufb %xmm1, %xmm8, %xmm3; /* be => le */ + + vpcmpeqd %xmm0, %xmm0, %xmm0; + vpsrldq $8, %xmm0, %xmm0; /* low: -1, high: 0 */ + + /* construct IVs */ + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm9; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm10; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm11; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm12; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm13; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm14; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm15; + vmovdqu %xmm8, (0 * 16)(%rcx); + vmovdqu %xmm9, (1 * 16)(%rcx); + vmovdqu %xmm10, (2 * 16)(%rcx); + vmovdqu %xmm11, (3 * 16)(%rcx); + vmovdqu %xmm12, (4 * 16)(%rcx); + vmovdqu %xmm13, (5 * 16)(%rcx); + vmovdqu %xmm14, (6 * 16)(%rcx); + vmovdqu %xmm15, (7 * 16)(%rcx); + + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm8; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm9; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm10; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm11; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm12; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm13; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm14; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm15; + inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */ + vpshufb %xmm1, %xmm3, %xmm4; + vmovdqu %xmm4, (%r8); + + vmovdqu (0 * 16)(%rcx), %xmm0; + vmovdqu (1 * 16)(%rcx), %xmm1; + vmovdqu (2 * 16)(%rcx), %xmm2; + vmovdqu (3 * 16)(%rcx), %xmm3; + vmovdqu (4 * 16)(%rcx), %xmm4; + vmovdqu (5 * 16)(%rcx), %xmm5; + vmovdqu (6 * 16)(%rcx), %xmm6; + vmovdqu (7 * 16)(%rcx), %xmm7; + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way) + +SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_aesni_avx_ctr_gen_keystream_16way; + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq enc_key(CTX), %r9; + + call __aria_aesni_avx_crypt_16way; + + vpxor (0 * 16)(%r11), %xmm1, %xmm1; + vpxor (1 * 16)(%r11), %xmm0, %xmm0; + vpxor (2 * 16)(%r11), %xmm3, %xmm3; + vpxor (3 * 16)(%r11), %xmm2, %xmm2; + vpxor (4 * 16)(%r11), %xmm4, %xmm4; + vpxor (5 * 16)(%r11), %xmm5, %xmm5; + vpxor (6 * 16)(%r11), %xmm6, %xmm6; + vpxor (7 * 16)(%r11), %xmm7, %xmm7; + vpxor (8 * 16)(%r11), %xmm8, %xmm8; + vpxor (9 * 16)(%r11), %xmm9, %xmm9; + vpxor (10 * 16)(%r11), %xmm10, %xmm10; + vpxor (11 * 16)(%r11), %xmm11, %xmm11; + vpxor (12 * 16)(%r11), %xmm12, %xmm12; + vpxor (13 * 16)(%r11), %xmm13, %xmm13; + vpxor (14 * 16)(%r11), %xmm14, %xmm14; + vpxor (15 * 16)(%r11), %xmm15, %xmm15; + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way) + +SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %xmm0..%xmm15: 16 byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 16(%rax), %r8; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r8); + aria_fo_gfni(%xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 0); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 1); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 2); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 3); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 4); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 5); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 6); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 7); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 8); + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 9); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 10); + cmpl $12, rounds(CTX); + jne .Laria_gfni_192; + aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11, 12); + jmp .Laria_gfni_end; +.Laria_gfni_192: + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 11); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 12); + cmpl $14, rounds(CTX); + jne .Laria_gfni_256; + aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13, 14); + jmp .Laria_gfni_end; +.Laria_gfni_256: + aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 13); + aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10, + %xmm12, %xmm13, %xmm14, %xmm15, + %xmm0, %xmm1, %xmm2, %xmm3, + %xmm4, %xmm5, %xmm6, %xmm7, + %rax, %r9, 14); + aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, + %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, + %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %r9, 15, 16); +.Laria_gfni_end: + debyteslice_16x16b(%xmm8, %xmm12, %xmm1, %xmm4, + %xmm9, %xmm13, %xmm0, %xmm5, + %xmm10, %xmm14, %xmm3, %xmm6, + %xmm11, %xmm15, %xmm2, %xmm7, + (%rax), (%r8)); + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way) + +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq enc_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_gfni_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way) + +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq dec_key(CTX), %r9; + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx); + + call __aria_aesni_avx_gfni_crypt_16way; + + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way) + +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_aesni_avx_ctr_gen_keystream_16way + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq enc_key(CTX), %r9; + + call __aria_aesni_avx_gfni_crypt_16way; + + vpxor (0 * 16)(%r11), %xmm1, %xmm1; + vpxor (1 * 16)(%r11), %xmm0, %xmm0; + vpxor (2 * 16)(%r11), %xmm3, %xmm3; + vpxor (3 * 16)(%r11), %xmm2, %xmm2; + vpxor (4 * 16)(%r11), %xmm4, %xmm4; + vpxor (5 * 16)(%r11), %xmm5, %xmm5; + vpxor (6 * 16)(%r11), %xmm6, %xmm6; + vpxor (7 * 16)(%r11), %xmm7, %xmm7; + vpxor (8 * 16)(%r11), %xmm8, %xmm8; + vpxor (9 * 16)(%r11), %xmm9, %xmm9; + vpxor (10 * 16)(%r11), %xmm10, %xmm10; + vpxor (11 * 16)(%r11), %xmm11, %xmm11; + vpxor (12 * 16)(%r11), %xmm12, %xmm12; + vpxor (13 * 16)(%r11), %xmm13, %xmm13; + vpxor (14 * 16)(%r11), %xmm14, %xmm14; + vpxor (15 * 16)(%r11), %xmm15, %xmm15; + write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way) diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h new file mode 100644 index 000000000..01e9a01dc --- /dev/null +++ b/arch/x86/crypto/aria-avx.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_ARIA_AVX_H +#define ASM_X86_ARIA_AVX_H + +#include + +#define ARIA_AESNI_PARALLEL_BLOCKS 16 +#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * 16) + +struct aria_avx_ops { + void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src, + u8 *keystream, u8 *iv); +}; +#endif diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c new file mode 100644 index 000000000..c561ea4fe --- /dev/null +++ b/arch/x86/crypto/aria_aesni_avx_glue.c @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Glue Code for the AVX/AES-NI/GFNI assembler implementation of the ARIA Cipher + * + * Copyright (c) 2022 Taehee Yoo + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ecb_cbc_helpers.h" +#include "aria-avx.h" + +asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); +asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); + +static struct aria_avx_ops aria_ops; + +static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); + ECB_BLOCK(1, aria_encrypt); + ECB_WALK_END(); +} + +static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); + ECB_BLOCK(1, aria_decrypt); + ECB_WALK_END(); +} + +static int aria_avx_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_encrypt(req, ctx->enc_key[0]); +} + +static int aria_avx_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_decrypt(req, ctx->dec_key[0]); +} + +static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return aria_set_key(&tfm->base, key, keylen); +} + +static int aria_avx_ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { + u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; + + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream, + walk.iv); + kernel_fpu_end(); + dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_BLOCK_SIZE) { + u8 keystream[ARIA_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE); + dst += ARIA_BLOCK_SIZE; + src += ARIA_BLOCK_SIZE; + nbytes -= ARIA_BLOCK_SIZE; + } + + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[ARIA_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg aria_algs[] = { + { + .base.cra_name = "__ecb(aria)", + .base.cra_driver_name = "__ecb-aria-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = ARIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .setkey = aria_avx_set_key, + .encrypt = aria_avx_ecb_encrypt, + .decrypt = aria_avx_ecb_decrypt, + }, { + .base.cra_name = "__ctr(aria)", + .base.cra_driver_name = "__ctr-aria-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .ivsize = ARIA_BLOCK_SIZE, + .chunksize = ARIA_BLOCK_SIZE, + .walksize = 16 * ARIA_BLOCK_SIZE, + .setkey = aria_avx_set_key, + .encrypt = aria_avx_ctr_encrypt, + .decrypt = aria_avx_ctr_encrypt, + } +}; + +static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)]; + +static int __init aria_avx_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + if (boot_cpu_has(X86_FEATURE_GFNI)) { + aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; + } else { + aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way; + } + + return simd_register_skciphers_compat(aria_algs, + ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +static void __exit aria_avx_exit(void) +{ + simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +module_init(aria_avx_init); +module_exit(aria_avx_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Taehee Yoo "); +MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX/AES-NI/GFNI optimized"); +MODULE_ALIAS_CRYPTO("aria"); +MODULE_ALIAS_CRYPTO("aria-aesni-avx"); diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S new file mode 100644 index 000000000..b50b35ff1 --- /dev/null +++ b/arch/x86/crypto/blake2s-core.S @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * Copyright (C) 2017-2019 Samuel Neves . All Rights Reserved. + */ + +#include + +.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 +.align 32 +IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 + .octa 0x5BE0CD191F83D9AB9B05688C510E527F +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 +ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 +.section .rodata.cst16.ROR328, "aM", @progbits, 16 +.align 16 +ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 +.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 +.align 64 +SIGMA: +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 +.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 +.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 +.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 +.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 +.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 +.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 +.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 +.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 +#ifdef CONFIG_AS_AVX512 +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 +.align 64 +SIGMA2: +.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 +.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 +.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 +.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 +.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 +.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 +.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 +.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 +.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 +#endif /* CONFIG_AS_AVX512 */ + +.text +SYM_FUNC_START(blake2s_compress_ssse3) + testq %rdx,%rdx + je .Lendofloop + movdqu (%rdi),%xmm0 + movdqu 0x10(%rdi),%xmm1 + movdqa ROT16(%rip),%xmm12 + movdqa ROR328(%rip),%xmm13 + movdqu 0x20(%rdi),%xmm14 + movq %rcx,%xmm15 + leaq SIGMA+0xa0(%rip),%r8 + jmp .Lbeginofloop + .align 32 +.Lbeginofloop: + movdqa %xmm0,%xmm10 + movdqa %xmm1,%xmm11 + paddq %xmm15,%xmm14 + movdqa IV(%rip),%xmm2 + movdqa %xmm14,%xmm3 + pxor IV+0x10(%rip),%xmm3 + leaq SIGMA(%rip),%rcx +.Lroundloop: + movzbl (%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0x1(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x2(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x3(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + punpckldq %xmm5,%xmm4 + punpckldq %xmm7,%xmm6 + punpcklqdq %xmm6,%xmm4 + paddd %xmm4,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0x4(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x5(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x6(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0x7(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + punpckldq %xmm6,%xmm5 + punpckldq %xmm4,%xmm7 + punpcklqdq %xmm7,%xmm5 + paddd %xmm5,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x93,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + movzbl 0x8(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x9(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xa(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xb(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + punpckldq %xmm7,%xmm6 + punpckldq %xmm5,%xmm4 + punpcklqdq %xmm4,%xmm6 + paddd %xmm6,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0xc(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xd(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xe(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0xf(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + punpckldq %xmm4,%xmm7 + punpckldq %xmm6,%xmm5 + punpcklqdq %xmm5,%xmm7 + paddd %xmm7,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x93,%xmm2,%xmm2 + addq $0x10,%rcx + cmpq %r8,%rcx + jnz .Lroundloop + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm1 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm1 + addq $0x40,%rsi + decq %rdx + jnz .Lbeginofloop + movdqu %xmm0,(%rdi) + movdqu %xmm1,0x10(%rdi) + movdqu %xmm14,0x20(%rdi) +.Lendofloop: + RET +SYM_FUNC_END(blake2s_compress_ssse3) + +#ifdef CONFIG_AS_AVX512 +SYM_FUNC_START(blake2s_compress_avx512) + vmovdqu (%rdi),%xmm0 + vmovdqu 0x10(%rdi),%xmm1 + vmovdqu 0x20(%rdi),%xmm4 + vmovq %rcx,%xmm5 + vmovdqa IV(%rip),%xmm14 + vmovdqa IV+16(%rip),%xmm15 + jmp .Lblake2s_compress_avx512_mainloop +.align 32 +.Lblake2s_compress_avx512_mainloop: + vmovdqa %xmm0,%xmm10 + vmovdqa %xmm1,%xmm11 + vpaddq %xmm5,%xmm4,%xmm4 + vmovdqa %xmm14,%xmm2 + vpxor %xmm15,%xmm4,%xmm3 + vmovdqu (%rsi),%ymm6 + vmovdqu 0x20(%rsi),%ymm7 + addq $0x40,%rsi + leaq SIGMA2(%rip),%rax + movb $0xa,%cl +.Lblake2s_compress_avx512_roundloop: + addq $0x40,%rax + vmovdqa -0x40(%rax),%ymm8 + vmovdqa -0x20(%rax),%ymm9 + vpermi2d %ymm7,%ymm6,%ymm8 + vpermi2d %ymm7,%ymm6,%ymm9 + vmovdqa %ymm8,%ymm6 + vmovdqa %ymm9,%ymm7 + vpaddd %xmm8,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x10,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0xc,%xmm1,%xmm1 + vextracti128 $0x1,%ymm8,%xmm8 + vpaddd %xmm8,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x8,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0x7,%xmm1,%xmm1 + vpshufd $0x93,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x39,%xmm2,%xmm2 + vpaddd %xmm9,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x10,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0xc,%xmm1,%xmm1 + vextracti128 $0x1,%ymm9,%xmm9 + vpaddd %xmm9,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x8,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0x7,%xmm1,%xmm1 + vpshufd $0x39,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x93,%xmm2,%xmm2 + decb %cl + jne .Lblake2s_compress_avx512_roundloop + vpxor %xmm10,%xmm0,%xmm0 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm2,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + decq %rdx + jne .Lblake2s_compress_avx512_mainloop + vmovdqu %xmm0,(%rdi) + vmovdqu %xmm1,0x10(%rdi) + vmovdqu %xmm4,0x20(%rdi) + vzeroupper + RET +SYM_FUNC_END(blake2s_compress_avx512) +#endif /* CONFIG_AS_AVX512 */ diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c new file mode 100644 index 000000000..aaba21230 --- /dev/null +++ b/arch/x86/crypto/blake2s-glue.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); + +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); + + if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { + blake2s_compress_generic(state, block, nblocks, inc); + return; + } + + do { + const size_t blocks = min_t(size_t, nblocks, + SZ_4K / BLAKE2S_BLOCK_SIZE); + + kernel_fpu_begin(); + if (IS_ENABLED(CONFIG_AS_AVX512) && + static_branch_likely(&blake2s_use_avx512)) + blake2s_compress_avx512(state, block, blocks, inc); + else + blake2s_compress_ssse3(state, block, blocks, inc); + kernel_fpu_end(); + + nblocks -= blocks; + block += blocks * BLAKE2S_BLOCK_SIZE; + } while (nblocks); +} +EXPORT_SYMBOL(blake2s_compress); + +static int __init blake2s_mod_init(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); + + if (IS_ENABLED(CONFIG_AS_AVX512) && + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + static_branch_enable(&blake2s_use_avx512); + + return 0; +} + +module_init(blake2s_mod_init); + +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S new file mode 100644 index 000000000..4a43e072d --- /dev/null +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -0,0 +1,369 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Blowfish Cipher Algorithm (x86_64) + * + * Copyright (C) 2011 Jussi Kivilinna + */ + +#include +#include + +.file "blowfish-x86_64-asm.S" +.text + +/* structure of crypto context */ +#define p 0 +#define s0 ((16 + 2) * 4) +#define s1 ((16 + 2 + (1 * 256)) * 4) +#define s2 ((16 + 2 + (2 * 256)) * 4) +#define s3 ((16 + 2 + (3 * 256)) * 4) + +/* register macros */ +#define CTX %r12 +#define RIO %rsi + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rcx +#define RX3 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %ecx +#define RX3d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %cl +#define RX3bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %ch +#define RX3bh %dh + +#define RT0 %rdi +#define RT1 %rsi +#define RT2 %r8 +#define RT3 %r9 + +#define RT0d %edi +#define RT1d %esi +#define RT2d %r8d +#define RT3d %r9d + +#define RKEY %r10 + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F() \ + rorq $16, RX0; \ + movzbl RX0bh, RT0d; \ + movzbl RX0bl, RT1d; \ + rolq $16, RX0; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT1,4), RT0d; \ + movzbl RX0bh, RT1d; \ + movzbl RX0bl, RT2d; \ + rolq $32, RX0; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT2,4), RT0d; \ + xorq RT0, RX0; + +#define add_roundkey_enc(n) \ + xorq p+4*(n)(CTX), RX0; + +#define round_enc(n) \ + add_roundkey_enc(n); \ + \ + F(); \ + F(); + +#define add_roundkey_dec(n) \ + movq p+4*(n-1)(CTX), RT0; \ + rorq $32, RT0; \ + xorq RT0, RX0; + +#define round_dec(n) \ + add_roundkey_dec(n); \ + \ + F(); \ + F(); \ + +#define read_block() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; + +#define write_block() \ + bswapq RX0; \ + movq RX0, (RIO); + +#define xor_block() \ + bswapq RX0; \ + xorq RX0, (RIO); + +SYM_FUNC_START(__blowfish_enc_blk) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + movq %r12, %r11; + + movq %rdi, CTX; + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + round_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + add_roundkey_enc(16); + + movq %r11, %r12; + + movq %r10, RIO; + test %cl, %cl; + jnz .L__enc_xor; + + write_block(); + RET; +.L__enc_xor: + xor_block(); + RET; +SYM_FUNC_END(__blowfish_enc_blk) + +SYM_TYPED_FUNC_START(blowfish_dec_blk) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + movq %r12, %r11; + + movq %rdi, CTX; + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + round_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + add_roundkey_dec(1); + + movq %r10, RIO; + write_block(); + + movq %r11, %r12; + + RET; +SYM_FUNC_END(blowfish_dec_blk) + +/********************************************************************** + 4-way blowfish, four blocks parallel + **********************************************************************/ + +/* F() for 4-way. Slower when used alone/1-way, but faster when used + * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330). + */ +#define F4(x) \ + movzbl x ## bh, RT1d; \ + movzbl x ## bl, RT3d; \ + rorq $16, x; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT2d; \ + rorq $16, x; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, x; + +#define add_preloaded_roundkey4() \ + xorq RKEY, RX0; \ + xorq RKEY, RX1; \ + xorq RKEY, RX2; \ + xorq RKEY, RX3; + +#define preload_roundkey_enc(n) \ + movq p+4*(n)(CTX), RKEY; + +#define add_roundkey_enc4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_enc(n + 2); + +#define round_enc4(n) \ + add_roundkey_enc4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define preload_roundkey_dec(n) \ + movq p+4*((n)-1)(CTX), RKEY; \ + rorq $32, RKEY; + +#define add_roundkey_dec4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_dec(n - 2); + +#define round_dec4(n) \ + add_roundkey_dec4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define read_block4() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; \ + \ + movq 8(RIO), RX1; \ + rorq $32, RX1; \ + bswapq RX1; \ + \ + movq 16(RIO), RX2; \ + rorq $32, RX2; \ + bswapq RX2; \ + \ + movq 24(RIO), RX3; \ + rorq $32, RX3; \ + bswapq RX3; + +#define write_block4() \ + bswapq RX0; \ + movq RX0, (RIO); \ + \ + bswapq RX1; \ + movq RX1, 8(RIO); \ + \ + bswapq RX2; \ + movq RX2, 16(RIO); \ + \ + bswapq RX3; \ + movq RX3, 24(RIO); + +#define xor_block4() \ + bswapq RX0; \ + xorq RX0, (RIO); \ + \ + bswapq RX1; \ + xorq RX1, 8(RIO); \ + \ + bswapq RX2; \ + xorq RX2, 16(RIO); \ + \ + bswapq RX3; \ + xorq RX3, 24(RIO); + +SYM_FUNC_START(__blowfish_enc_blk_4way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + pushq %r12; + pushq %rbx; + pushq %rcx; + + movq %rdi, CTX + movq %rsi, %r11; + movq %rdx, RIO; + + preload_roundkey_enc(0); + + read_block4(); + + round_enc4(0); + round_enc4(2); + round_enc4(4); + round_enc4(6); + round_enc4(8); + round_enc4(10); + round_enc4(12); + round_enc4(14); + add_preloaded_roundkey4(); + + popq %r12; + movq %r11, RIO; + + test %r12b, %r12b; + jnz .L__enc_xor4; + + write_block4(); + + popq %rbx; + popq %r12; + RET; + +.L__enc_xor4: + xor_block4(); + + popq %rbx; + popq %r12; + RET; +SYM_FUNC_END(__blowfish_enc_blk_4way) + +SYM_TYPED_FUNC_START(blowfish_dec_blk_4way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + pushq %r12; + pushq %rbx; + + movq %rdi, CTX; + movq %rsi, %r11 + movq %rdx, RIO; + + preload_roundkey_dec(17); + read_block4(); + + round_dec4(17); + round_dec4(15); + round_dec4(13); + round_dec4(11); + round_dec4(9); + round_dec4(7); + round_dec4(5); + round_dec4(3); + add_preloaded_roundkey4(); + + movq %r11, RIO; + write_block4(); + + popq %rbx; + popq %r12; + + RET; +SYM_FUNC_END(blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c new file mode 100644 index 000000000..019c64c13 --- /dev/null +++ b/arch/x86/crypto/blowfish_glue.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for assembler optimized version of Blowfish + * + * Copyright (c) 2011 Jussi Kivilinna + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + */ + +#include +#include +#include +#include +#include +#include +#include + +/* regular block cipher functions */ +asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); + +/* 4-way parallel cipher functions */ +asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, false); +} + +static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return blowfish_setkey(&tfm->base, key, keylen); +} + +static int ecb_crypt(struct skcipher_request *req, + void (*fn)(struct bf_ctx *, u8 *, const u8 *), + void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) +{ + unsigned int bsize = BF_BLOCK_SIZE; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + fn_4way(ctx, wdst, wsrc); + + wsrc += bsize * 4; + wdst += bsize * 4; + nbytes -= bsize * 4; + } while (nbytes >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); +} + +static unsigned int __cbc_encrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) +{ + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) +{ + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[4 - 1]; + u64 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + nbytes -= bsize * 4 - bsize; + src -= 4 - 1; + dst -= 4 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + ivs[2] = src[2]; + + blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); + + dst[1] ^= ivs[0]; + dst[2] ^= ivs[1]; + dst[3] ^= ivs[2]; + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 4); + } + + /* Handle leftovers */ + for (;;) { + blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct crypto_alg bf_cipher_alg = { + .cra_name = "blowfish", + .cra_driver_name = "blowfish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = BF_MIN_KEY_SIZE, + .cia_max_keysize = BF_MAX_KEY_SIZE, + .cia_setkey = blowfish_setkey, + .cia_encrypt = blowfish_encrypt, + .cia_decrypt = blowfish_decrypt, + } + } +}; + +static struct skcipher_alg bf_skcipher_algs[] = { + { + .base.cra_name = "ecb(blowfish)", + .base.cra_driver_name = "ecb-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(blowfish)", + .base.cra_driver_name = "cbc-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, blowfish-x86_64 is slower than generic C + * implementation because use of 64bit rotates (which are really + * slow on P4). Therefore blacklist P4s. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init blowfish_init(void) +{ + int err; + + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "blowfish-x86_64: performance on this CPU " + "would be suboptimal: disabling " + "blowfish-x86_64.\n"); + return -ENODEV; + } + + err = crypto_register_alg(&bf_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); + if (err) + crypto_unregister_alg(&bf_cipher_alg); + + return err; +} + +static void __exit blowfish_fini(void) +{ + crypto_unregister_alg(&bf_cipher_alg); + crypto_unregister_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); +} + +module_init(blowfish_init); +module_exit(blowfish_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); +MODULE_ALIAS_CRYPTO("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish-asm"); diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S new file mode 100644 index 000000000..2e1658ddb --- /dev/null +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -0,0 +1,991 @@ +/* + * x86_64/AVX/AES-NI assembler implementation of Camellia + * + * Copyright © 2012-2013 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* + * Version licensed under 2-clause BSD License is available at: + * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz + */ + +#include +#include + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct camellia_ctx: */ +#define key_table 0 +#define key_length CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi + +/********************************************************************** + 16-way camellia + **********************************************************************/ +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* + * IN: + * x0..x7: byte-sliced AB state + * mem_cd: register pointer storing CD state + * key: index for key material + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ + t7, mem_cd, key) \ + /* \ + * S-function with AES subbytes \ + */ \ + vmovdqa .Linv_shift_row, t4; \ + vbroadcastss .L0f0f0f0f, t7; \ + vmovdqa .Lpre_tf_lo_s1, t0; \ + vmovdqa .Lpre_tf_hi_s1, t1; \ + \ + /* AES inverse shift rows */ \ + vpshufb t4, x0, x0; \ + vpshufb t4, x7, x7; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x3, x3; \ + vpshufb t4, x6, x6; \ + \ + /* prefilter sboxes 1, 2 and 3 */ \ + vmovdqa .Lpre_tf_lo_s4, t2; \ + vmovdqa .Lpre_tf_hi_s4, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x1, t0, t1, t7, t6); \ + filter_8bit(x4, t0, t1, t7, t6); \ + filter_8bit(x2, t0, t1, t7, t6); \ + filter_8bit(x5, t0, t1, t7, t6); \ + \ + /* prefilter sbox 4 */ \ + vpxor t4, t4, t4; \ + filter_8bit(x3, t2, t3, t7, t6); \ + filter_8bit(x6, t2, t3, t7, t6); \ + \ + /* AES subbytes + AES shift rows */ \ + vmovdqa .Lpost_tf_lo_s1, t0; \ + vmovdqa .Lpost_tf_hi_s1, t1; \ + vaesenclast t4, x0, x0; \ + vaesenclast t4, x7, x7; \ + vaesenclast t4, x1, x1; \ + vaesenclast t4, x4, x4; \ + vaesenclast t4, x2, x2; \ + vaesenclast t4, x5, x5; \ + vaesenclast t4, x3, x3; \ + vaesenclast t4, x6, x6; \ + \ + /* postfilter sboxes 1 and 4 */ \ + vmovdqa .Lpost_tf_lo_s3, t2; \ + vmovdqa .Lpost_tf_hi_s3, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x3, t0, t1, t7, t6); \ + filter_8bit(x6, t0, t1, t7, t6); \ + \ + /* postfilter sbox 3 */ \ + vmovdqa .Lpost_tf_lo_s2, t4; \ + vmovdqa .Lpost_tf_hi_s2, t5; \ + filter_8bit(x2, t2, t3, t7, t6); \ + filter_8bit(x5, t2, t3, t7, t6); \ + \ + vpxor t6, t6, t6; \ + vmovq key, t0; \ + \ + /* postfilter sbox 2 */ \ + filter_8bit(x1, t4, t5, t7, t2); \ + filter_8bit(x4, t4, t5, t7, t2); \ + \ + vpsrldq $5, t0, t5; \ + vpsrldq $1, t0, t1; \ + vpsrldq $2, t0, t2; \ + vpsrldq $3, t0, t3; \ + vpsrldq $4, t0, t4; \ + vpshufb t6, t0, t0; \ + vpshufb t6, t1, t1; \ + vpshufb t6, t2, t2; \ + vpshufb t6, t3, t3; \ + vpshufb t6, t4, t4; \ + vpsrldq $2, t5, t7; \ + vpshufb t6, t7, t7; \ + \ + /* \ + * P-function \ + */ \ + vpxor x5, x0, x0; \ + vpxor x6, x1, x1; \ + vpxor x7, x2, x2; \ + vpxor x4, x3, x3; \ + \ + vpxor x2, x4, x4; \ + vpxor x3, x5, x5; \ + vpxor x0, x6, x6; \ + vpxor x1, x7, x7; \ + \ + vpxor x7, x0, x0; \ + vpxor x4, x1, x1; \ + vpxor x5, x2, x2; \ + vpxor x6, x3, x3; \ + \ + vpxor x3, x4, x4; \ + vpxor x0, x5, x5; \ + vpxor x1, x6, x6; \ + vpxor x2, x7, x7; /* note: high and low parts swapped */ \ + \ + /* \ + * Add key material and result to CD (x becomes new CD) \ + */ \ + \ + vpxor t3, x4, x4; \ + vpxor 0 * 16(mem_cd), x4, x4; \ + \ + vpxor t2, x5, x5; \ + vpxor 1 * 16(mem_cd), x5, x5; \ + \ + vpsrldq $1, t5, t3; \ + vpshufb t6, t5, t5; \ + vpshufb t6, t3, t6; \ + \ + vpxor t1, x6, x6; \ + vpxor 2 * 16(mem_cd), x6, x6; \ + \ + vpxor t0, x7, x7; \ + vpxor 3 * 16(mem_cd), x7, x7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 16(mem_cd), x0, x0; \ + \ + vpxor t6, x1, x1; \ + vpxor 5 * 16(mem_cd), x1, x1; \ + \ + vpxor t5, x2, x2; \ + vpxor 6 * 16(mem_cd), x2, x2; \ + \ + vpxor t4, x3, x3; \ + vpxor 7 * 16(mem_cd), x3, x3; + +/* + * Size optimization... with inlined roundsm16, binary would be over 5 times + * larger and would only be 0.5% faster (on sandy-bridge). + */ +.align 8 +SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) + roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, + %rcx, (%r9)); + RET; +SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) + +.align 8 +SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) + roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, + %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, + %rax, (%r9)); + RET; +SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) + +/* + * IN/OUT: + * x0..x7: byte-sliced AB state preloaded + * mem_ab: byte-sliced AB state in memory + * mem_cb: byte-sliced CD state in memory + */ +#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ + leaq (key_table + (i) * 8)(CTX), %r9; \ + call roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \ + \ + vmovdqu x4, 0 * 16(mem_cd); \ + vmovdqu x5, 1 * 16(mem_cd); \ + vmovdqu x6, 2 * 16(mem_cd); \ + vmovdqu x7, 3 * 16(mem_cd); \ + vmovdqu x0, 4 * 16(mem_cd); \ + vmovdqu x1, 5 * 16(mem_cd); \ + vmovdqu x2, 6 * 16(mem_cd); \ + vmovdqu x3, 7 * 16(mem_cd); \ + \ + leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \ + call roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \ + \ + store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); + +#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ + +#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ + /* Store new AB state */ \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); + +#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); + +#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); + +/* + * IN: + * v0..3: byte-sliced 32-bit integers + * OUT: + * v0..3: (IN <<< 1) + */ +#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \ + vpcmpgtb v0, zero, t0; \ + vpaddb v0, v0, v0; \ + vpabsb t0, t0; \ + \ + vpcmpgtb v1, zero, t1; \ + vpaddb v1, v1, v1; \ + vpabsb t1, t1; \ + \ + vpcmpgtb v2, zero, t2; \ + vpaddb v2, v2, v2; \ + vpabsb t2, t2; \ + \ + vpor t0, v1, v1; \ + \ + vpcmpgtb v3, zero, t0; \ + vpaddb v3, v3, v3; \ + vpabsb t0, t0; \ + \ + vpor t1, v2, v2; \ + vpor t2, v3, v3; \ + vpor t0, v0, v0; + +/* + * IN: + * r: byte-sliced AB state in memory + * l: byte-sliced CD state in memory + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ + tt1, tt2, tt3, kll, klr, krl, krr) \ + /* \ + * t0 = kll; \ + * t0 &= ll; \ + * lr ^= rol32(t0, 1); \ + */ \ + vpxor tt0, tt0, tt0; \ + vmovd kll, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand l0, t0, t0; \ + vpand l1, t1, t1; \ + vpand l2, t2, t2; \ + vpand l3, t3, t3; \ + \ + rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor l4, t0, l4; \ + vmovdqu l4, 4 * 16(l); \ + vpxor l5, t1, l5; \ + vmovdqu l5, 5 * 16(l); \ + vpxor l6, t2, l6; \ + vmovdqu l6, 6 * 16(l); \ + vpxor l7, t3, l7; \ + vmovdqu l7, 7 * 16(l); \ + \ + /* \ + * t2 = krr; \ + * t2 |= rr; \ + * rl ^= t2; \ + */ \ + \ + vmovd krr, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor 4 * 16(r), t0, t0; \ + vpor 5 * 16(r), t1, t1; \ + vpor 6 * 16(r), t2, t2; \ + vpor 7 * 16(r), t3, t3; \ + \ + vpxor 0 * 16(r), t0, t0; \ + vpxor 1 * 16(r), t1, t1; \ + vpxor 2 * 16(r), t2, t2; \ + vpxor 3 * 16(r), t3, t3; \ + vmovdqu t0, 0 * 16(r); \ + vmovdqu t1, 1 * 16(r); \ + vmovdqu t2, 2 * 16(r); \ + vmovdqu t3, 3 * 16(r); \ + \ + /* \ + * t2 = krl; \ + * t2 &= rl; \ + * rr ^= rol32(t2, 1); \ + */ \ + vmovd krl, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand 0 * 16(r), t0, t0; \ + vpand 1 * 16(r), t1, t1; \ + vpand 2 * 16(r), t2, t2; \ + vpand 3 * 16(r), t3, t3; \ + \ + rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor 4 * 16(r), t0, t0; \ + vpxor 5 * 16(r), t1, t1; \ + vpxor 6 * 16(r), t2, t2; \ + vpxor 7 * 16(r), t3, t3; \ + vmovdqu t0, 4 * 16(r); \ + vmovdqu t1, 5 * 16(r); \ + vmovdqu t2, 6 * 16(r); \ + vmovdqu t3, 7 * 16(r); \ + \ + /* \ + * t0 = klr; \ + * t0 |= lr; \ + * ll ^= t0; \ + */ \ + \ + vmovd klr, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor l4, t0, t0; \ + vpor l5, t1, t1; \ + vpor l6, t2, t2; \ + vpor l7, t3, t3; \ + \ + vpxor l0, t0, l0; \ + vmovdqu l0, 0 * 16(l); \ + vpxor l1, t1, l1; \ + vmovdqu l1, 1 * 16(l); \ + vpxor l2, t2, l2; \ + vmovdqu l2, 2 * 16(l); \ + vpxor l3, t3, l3; \ + vmovdqu l3, 3 * 16(l); + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, \ + b3, c3, d3, st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio, key) \ + vmovq key, x0; \ + vpshufb .Lpack_bswap, x0, x0; \ + \ + vpxor 0 * 16(rio), x0, y7; \ + vpxor 1 * 16(rio), x0, y6; \ + vpxor 2 * 16(rio), x0, y5; \ + vpxor 3 * 16(rio), x0, y4; \ + vpxor 4 * 16(rio), x0, y3; \ + vpxor 5 * 16(rio), x0, y2; \ + vpxor 6 * 16(rio), x0, y1; \ + vpxor 7 * 16(rio), x0, y0; \ + vpxor 8 * 16(rio), x0, x7; \ + vpxor 9 * 16(rio), x0, x6; \ + vpxor 10 * 16(rio), x0, x5; \ + vpxor 11 * 16(rio), x0, x4; \ + vpxor 12 * 16(rio), x0, x3; \ + vpxor 13 * 16(rio), x0, x2; \ + vpxor 14 * 16(rio), x0, x1; \ + vpxor 15 * 16(rio), x0, x0; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd) \ + byteslice_16x16b(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); \ + vmovdqu y0, 0 * 16(mem_cd); \ + vmovdqu y1, 1 * 16(mem_cd); \ + vmovdqu y2, 2 * 16(mem_cd); \ + vmovdqu y3, 3 * 16(mem_cd); \ + vmovdqu y4, 4 * 16(mem_cd); \ + vmovdqu y5, 5 * 16(mem_cd); \ + vmovdqu y6, 6 * 16(mem_cd); \ + vmovdqu y7, 7 * 16(mem_cd); + +/* de-byteslice, apply post-whitening and store blocks */ +#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, key, stack_tmp0, stack_tmp1) \ + byteslice_16x16b(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, y3, \ + y7, x3, x7, stack_tmp0, stack_tmp1); \ + \ + vmovdqu x0, stack_tmp0; \ + \ + vmovq key, x0; \ + vpshufb .Lpack_bswap, x0, x0; \ + \ + vpxor x0, y7, y7; \ + vpxor x0, y6, y6; \ + vpxor x0, y5, y5; \ + vpxor x0, y4, y4; \ + vpxor x0, y3, y3; \ + vpxor x0, y2, y2; \ + vpxor x0, y1, y1; \ + vpxor x0, y0, y0; \ + vpxor x0, x7, x7; \ + vpxor x0, x6, x6; \ + vpxor x0, x5, x5; \ + vpxor x0, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x0, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor stack_tmp0, x0, x0; + +#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio) \ + vmovdqu x0, 0 * 16(rio); \ + vmovdqu x1, 1 * 16(rio); \ + vmovdqu x2, 2 * 16(rio); \ + vmovdqu x3, 3 * 16(rio); \ + vmovdqu x4, 4 * 16(rio); \ + vmovdqu x5, 5 * 16(rio); \ + vmovdqu x6, 6 * 16(rio); \ + vmovdqu x7, 7 * 16(rio); \ + vmovdqu y0, 8 * 16(rio); \ + vmovdqu y1, 9 * 16(rio); \ + vmovdqu y2, 10 * 16(rio); \ + vmovdqu y3, 11 * 16(rio); \ + vmovdqu y4, 12 * 16(rio); \ + vmovdqu y5, 13 * 16(rio); \ + vmovdqu y6, 14 * 16(rio); \ + vmovdqu y7, 15 * 16(rio); + + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3); + +.Lpack_bswap: + .long 0x00010203 + .long 0x04050607 + .long 0x80808080 + .long 0x80808080 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox1, sbox2, sbox3: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s1: + .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 + .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 +.Lpre_tf_hi_s1: + .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a + .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox4: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in <<< 1) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s4: + .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 + .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 +.Lpre_tf_hi_s4: + .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 + .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf + +/* + * post-SubByte transform + * + * post-lookup for sbox1, sbox4: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s1: + .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 + .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 +.Lpost_tf_hi_s1: + .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 + .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c + +/* + * post-SubByte transform + * + * post-lookup for sbox2: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) <<< 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s2: + .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 + .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 +.Lpost_tf_hi_s2: + .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 + .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 + +/* + * post-SubByte transform + * + * post-lookup for sbox3: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) >>> 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s3: + .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 + .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 +.Lpost_tf_hi_s3: + .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 + .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 +.align 4 +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text + +.align 8 +SYM_FUNC_START_LOCAL(__camellia_enc_blk16) + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 256 bytes + * %xmm0..%xmm15: 16 plaintext blocks + * output: + * %xmm0..%xmm15: 16 encrypted blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + FRAME_BEGIN + + leaq 8 * 16(%rax), %rcx; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 0); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX), + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX)); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 8); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX), + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX)); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 16); + + movl $24, %r8d; + cmpl $16, key_length(CTX); + jne .Lenc_max32; + +.Lenc_done: + /* load CD for output */ + vmovdqu 0 * 16(%rcx), %xmm8; + vmovdqu 1 * 16(%rcx), %xmm9; + vmovdqu 2 * 16(%rcx), %xmm10; + vmovdqu 3 * 16(%rcx), %xmm11; + vmovdqu 4 * 16(%rcx), %xmm12; + vmovdqu 5 * 16(%rcx), %xmm13; + vmovdqu 6 * 16(%rcx), %xmm14; + vmovdqu 7 * 16(%rcx), %xmm15; + + outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); + + FRAME_END + RET; + +.align 8 +.Lenc_max32: + movl $32, %r8d; + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX), + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX)); + + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 24); + + jmp .Lenc_done; +SYM_FUNC_END(__camellia_enc_blk16) + +.align 8 +SYM_FUNC_START_LOCAL(__camellia_dec_blk16) + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 256 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %xmm0..%xmm15: 16 encrypted blocks + * output: + * %xmm0..%xmm15: 16 plaintext blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + FRAME_BEGIN + + leaq 8 * 16(%rax), %rcx; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx); + + cmpl $32, %r8d; + je .Ldec_max32; + +.Ldec_max24: + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 16); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX), + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX)); + + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 8); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX), + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX)); + + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 0); + + /* load CD for output */ + vmovdqu 0 * 16(%rcx), %xmm8; + vmovdqu 1 * 16(%rcx), %xmm9; + vmovdqu 2 * 16(%rcx), %xmm10; + vmovdqu 3 * 16(%rcx), %xmm11; + vmovdqu 4 * 16(%rcx), %xmm12; + vmovdqu 5 * 16(%rcx), %xmm13; + vmovdqu 6 * 16(%rcx), %xmm14; + vmovdqu 7 * 16(%rcx), %xmm15; + + outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); + + FRAME_END + RET; + +.align 8 +.Ldec_max32: + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 24); + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX), + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX)); + + jmp .Ldec_max24; +SYM_FUNC_END(__camellia_dec_blk16) + +SYM_FUNC_START(camellia_ecb_enc_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + */ + FRAME_BEGIN + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX)); + + /* now dst can be used as temporary buffer (even in src == dst case) */ + movq %rsi, %rax; + + call __camellia_enc_blk16; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + FRAME_END + RET; +SYM_FUNC_END(camellia_ecb_enc_16way) + +SYM_FUNC_START(camellia_ecb_dec_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + */ + FRAME_BEGIN + + cmpl $16, key_length(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX, %r8, 8)); + + /* now dst can be used as temporary buffer (even in src == dst case) */ + movq %rsi, %rax; + + call __camellia_dec_blk16; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + FRAME_END + RET; +SYM_FUNC_END(camellia_ecb_dec_16way) + +SYM_FUNC_START(camellia_cbc_dec_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + */ + FRAME_BEGIN + + cmpl $16, key_length(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX, %r8, 8)); + + /* + * dst might still be in-use (in case dst == src), so use stack for + * temporary storage. + */ + subq $(16 * 16), %rsp; + movq %rsp, %rax; + + call __camellia_dec_blk16; + + addq $(16 * 16), %rsp; + + vpxor (0 * 16)(%rdx), %xmm6, %xmm6; + vpxor (1 * 16)(%rdx), %xmm5, %xmm5; + vpxor (2 * 16)(%rdx), %xmm4, %xmm4; + vpxor (3 * 16)(%rdx), %xmm3, %xmm3; + vpxor (4 * 16)(%rdx), %xmm2, %xmm2; + vpxor (5 * 16)(%rdx), %xmm1, %xmm1; + vpxor (6 * 16)(%rdx), %xmm0, %xmm0; + vpxor (7 * 16)(%rdx), %xmm15, %xmm15; + vpxor (8 * 16)(%rdx), %xmm14, %xmm14; + vpxor (9 * 16)(%rdx), %xmm13, %xmm13; + vpxor (10 * 16)(%rdx), %xmm12, %xmm12; + vpxor (11 * 16)(%rdx), %xmm11, %xmm11; + vpxor (12 * 16)(%rdx), %xmm10, %xmm10; + vpxor (13 * 16)(%rdx), %xmm9, %xmm9; + vpxor (14 * 16)(%rdx), %xmm8, %xmm8; + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + FRAME_END + RET; +SYM_FUNC_END(camellia_cbc_dec_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S new file mode 100644 index 000000000..0e4e9abbf --- /dev/null +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -0,0 +1,1051 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * x86_64/AVX2/AES-NI assembler implementation of Camellia + * + * Copyright © 2013 Jussi Kivilinna + */ + +#include +#include + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct camellia_ctx: */ +#define key_table 0 +#define key_length CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %r8 + +/********************************************************************** + helper macros + **********************************************************************/ +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +#define ymm0_x xmm0 +#define ymm1_x xmm1 +#define ymm2_x xmm2 +#define ymm3_x xmm3 +#define ymm4_x xmm4 +#define ymm5_x xmm5 +#define ymm6_x xmm6 +#define ymm7_x xmm7 +#define ymm8_x xmm8 +#define ymm9_x xmm9 +#define ymm10_x xmm10 +#define ymm11_x xmm11 +#define ymm12_x xmm12 +#define ymm13_x xmm13 +#define ymm14_x xmm14 +#define ymm15_x xmm15 + +/********************************************************************** + 32-way camellia + **********************************************************************/ + +/* + * IN: + * x0..x7: byte-sliced AB state + * mem_cd: register pointer storing CD state + * key: index for key material + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ + t7, mem_cd, key) \ + /* \ + * S-function with AES subbytes \ + */ \ + vbroadcasti128 .Linv_shift_row, t4; \ + vpbroadcastd .L0f0f0f0f, t7; \ + vbroadcasti128 .Lpre_tf_lo_s1, t5; \ + vbroadcasti128 .Lpre_tf_hi_s1, t6; \ + vbroadcasti128 .Lpre_tf_lo_s4, t2; \ + vbroadcasti128 .Lpre_tf_hi_s4, t3; \ + \ + /* AES inverse shift rows */ \ + vpshufb t4, x0, x0; \ + vpshufb t4, x7, x7; \ + vpshufb t4, x3, x3; \ + vpshufb t4, x6, x6; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ + \ + /* prefilter sboxes 1, 2 and 3 */ \ + /* prefilter sbox 4 */ \ + filter_8bit(x0, t5, t6, t7, t4); \ + filter_8bit(x7, t5, t6, t7, t4); \ + vextracti128 $1, x0, t0##_x; \ + vextracti128 $1, x7, t1##_x; \ + filter_8bit(x3, t2, t3, t7, t4); \ + filter_8bit(x6, t2, t3, t7, t4); \ + vextracti128 $1, x3, t3##_x; \ + vextracti128 $1, x6, t2##_x; \ + filter_8bit(x2, t5, t6, t7, t4); \ + filter_8bit(x5, t5, t6, t7, t4); \ + filter_8bit(x1, t5, t6, t7, t4); \ + filter_8bit(x4, t5, t6, t7, t4); \ + \ + vpxor t4##_x, t4##_x, t4##_x; \ + \ + /* AES subbytes + AES shift rows */ \ + vextracti128 $1, x2, t6##_x; \ + vextracti128 $1, x5, t5##_x; \ + vaesenclast t4##_x, x0##_x, x0##_x; \ + vaesenclast t4##_x, t0##_x, t0##_x; \ + vinserti128 $1, t0##_x, x0, x0; \ + vaesenclast t4##_x, x7##_x, x7##_x; \ + vaesenclast t4##_x, t1##_x, t1##_x; \ + vinserti128 $1, t1##_x, x7, x7; \ + vaesenclast t4##_x, x3##_x, x3##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vinserti128 $1, t3##_x, x3, x3; \ + vaesenclast t4##_x, x6##_x, x6##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t2##_x, x6, x6; \ + vextracti128 $1, x1, t3##_x; \ + vextracti128 $1, x4, t2##_x; \ + vbroadcasti128 .Lpost_tf_lo_s1, t0; \ + vbroadcasti128 .Lpost_tf_hi_s1, t1; \ + vaesenclast t4##_x, x2##_x, x2##_x; \ + vaesenclast t4##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x2, x2; \ + vaesenclast t4##_x, x5##_x, x5##_x; \ + vaesenclast t4##_x, t5##_x, t5##_x; \ + vinserti128 $1, t5##_x, x5, x5; \ + vaesenclast t4##_x, x1##_x, x1##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vinserti128 $1, t3##_x, x1, x1; \ + vaesenclast t4##_x, x4##_x, x4##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t2##_x, x4, x4; \ + \ + /* postfilter sboxes 1 and 4 */ \ + vbroadcasti128 .Lpost_tf_lo_s3, t2; \ + vbroadcasti128 .Lpost_tf_hi_s3, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x3, t0, t1, t7, t6); \ + filter_8bit(x6, t0, t1, t7, t6); \ + \ + /* postfilter sbox 3 */ \ + vbroadcasti128 .Lpost_tf_lo_s2, t4; \ + vbroadcasti128 .Lpost_tf_hi_s2, t5; \ + filter_8bit(x2, t2, t3, t7, t6); \ + filter_8bit(x5, t2, t3, t7, t6); \ + \ + vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \ + \ + /* postfilter sbox 2 */ \ + filter_8bit(x1, t4, t5, t7, t2); \ + filter_8bit(x4, t4, t5, t7, t2); \ + vpxor t7, t7, t7; \ + \ + vpsrldq $1, t0, t1; \ + vpsrldq $2, t0, t2; \ + vpshufb t7, t1, t1; \ + vpsrldq $3, t0, t3; \ + \ + /* P-function */ \ + vpxor x5, x0, x0; \ + vpxor x6, x1, x1; \ + vpxor x7, x2, x2; \ + vpxor x4, x3, x3; \ + \ + vpshufb t7, t2, t2; \ + vpsrldq $4, t0, t4; \ + vpshufb t7, t3, t3; \ + vpsrldq $5, t0, t5; \ + vpshufb t7, t4, t4; \ + \ + vpxor x2, x4, x4; \ + vpxor x3, x5, x5; \ + vpxor x0, x6, x6; \ + vpxor x1, x7, x7; \ + \ + vpsrldq $6, t0, t6; \ + vpshufb t7, t5, t5; \ + vpshufb t7, t6, t6; \ + \ + vpxor x7, x0, x0; \ + vpxor x4, x1, x1; \ + vpxor x5, x2, x2; \ + vpxor x6, x3, x3; \ + \ + vpxor x3, x4, x4; \ + vpxor x0, x5, x5; \ + vpxor x1, x6, x6; \ + vpxor x2, x7, x7; /* note: high and low parts swapped */ \ + \ + /* Add key material and result to CD (x becomes new CD) */ \ + \ + vpxor t6, x1, x1; \ + vpxor 5 * 32(mem_cd), x1, x1; \ + \ + vpsrldq $7, t0, t6; \ + vpshufb t7, t0, t0; \ + vpshufb t7, t6, t7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 32(mem_cd), x0, x0; \ + \ + vpxor t5, x2, x2; \ + vpxor 6 * 32(mem_cd), x2, x2; \ + \ + vpxor t4, x3, x3; \ + vpxor 7 * 32(mem_cd), x3, x3; \ + \ + vpxor t3, x4, x4; \ + vpxor 0 * 32(mem_cd), x4, x4; \ + \ + vpxor t2, x5, x5; \ + vpxor 1 * 32(mem_cd), x5, x5; \ + \ + vpxor t1, x6, x6; \ + vpxor 2 * 32(mem_cd), x6, x6; \ + \ + vpxor t0, x7, x7; \ + vpxor 3 * 32(mem_cd), x7, x7; + +/* + * Size optimization... with inlined roundsm32 binary would be over 5 times + * larger and would only marginally faster. + */ +.align 8 +SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) + roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, + %rcx, (%r9)); + RET; +SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) + +.align 8 +SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) + roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, + %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, + %rax, (%r9)); + RET; +SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) + +/* + * IN/OUT: + * x0..x7: byte-sliced AB state preloaded + * mem_ab: byte-sliced AB state in memory + * mem_cb: byte-sliced CD state in memory + */ +#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ + leaq (key_table + (i) * 8)(CTX), %r9; \ + call roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \ + \ + vmovdqu x0, 4 * 32(mem_cd); \ + vmovdqu x1, 5 * 32(mem_cd); \ + vmovdqu x2, 6 * 32(mem_cd); \ + vmovdqu x3, 7 * 32(mem_cd); \ + vmovdqu x4, 0 * 32(mem_cd); \ + vmovdqu x5, 1 * 32(mem_cd); \ + vmovdqu x6, 2 * 32(mem_cd); \ + vmovdqu x7, 3 * 32(mem_cd); \ + \ + leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \ + call roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \ + \ + store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); + +#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ + +#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ + /* Store new AB state */ \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); + +#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); + +#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); + +/* + * IN: + * v0..3: byte-sliced 32-bit integers + * OUT: + * v0..3: (IN <<< 1) + */ +#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \ + vpcmpgtb v0, zero, t0; \ + vpaddb v0, v0, v0; \ + vpabsb t0, t0; \ + \ + vpcmpgtb v1, zero, t1; \ + vpaddb v1, v1, v1; \ + vpabsb t1, t1; \ + \ + vpcmpgtb v2, zero, t2; \ + vpaddb v2, v2, v2; \ + vpabsb t2, t2; \ + \ + vpor t0, v1, v1; \ + \ + vpcmpgtb v3, zero, t0; \ + vpaddb v3, v3, v3; \ + vpabsb t0, t0; \ + \ + vpor t1, v2, v2; \ + vpor t2, v3, v3; \ + vpor t0, v0, v0; + +/* + * IN: + * r: byte-sliced AB state in memory + * l: byte-sliced CD state in memory + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ + tt1, tt2, tt3, kll, klr, krl, krr) \ + /* \ + * t0 = kll; \ + * t0 &= ll; \ + * lr ^= rol32(t0, 1); \ + */ \ + vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ + vpxor tt0, tt0, tt0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand l0, t0, t0; \ + vpand l1, t1, t1; \ + vpand l2, t2, t2; \ + vpand l3, t3, t3; \ + \ + rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor l4, t0, l4; \ + vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ + vmovdqu l4, 4 * 32(l); \ + vpxor l5, t1, l5; \ + vmovdqu l5, 5 * 32(l); \ + vpxor l6, t2, l6; \ + vmovdqu l6, 6 * 32(l); \ + vpxor l7, t3, l7; \ + vmovdqu l7, 7 * 32(l); \ + \ + /* \ + * t2 = krr; \ + * t2 |= rr; \ + * rl ^= t2; \ + */ \ + \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor 4 * 32(r), t0, t0; \ + vpor 5 * 32(r), t1, t1; \ + vpor 6 * 32(r), t2, t2; \ + vpor 7 * 32(r), t3, t3; \ + \ + vpxor 0 * 32(r), t0, t0; \ + vpxor 1 * 32(r), t1, t1; \ + vpxor 2 * 32(r), t2, t2; \ + vpxor 3 * 32(r), t3, t3; \ + vmovdqu t0, 0 * 32(r); \ + vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ + vmovdqu t1, 1 * 32(r); \ + vmovdqu t2, 2 * 32(r); \ + vmovdqu t3, 3 * 32(r); \ + \ + /* \ + * t2 = krl; \ + * t2 &= rl; \ + * rr ^= rol32(t2, 1); \ + */ \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand 0 * 32(r), t0, t0; \ + vpand 1 * 32(r), t1, t1; \ + vpand 2 * 32(r), t2, t2; \ + vpand 3 * 32(r), t3, t3; \ + \ + rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor 4 * 32(r), t0, t0; \ + vpxor 5 * 32(r), t1, t1; \ + vpxor 6 * 32(r), t2, t2; \ + vpxor 7 * 32(r), t3, t3; \ + vmovdqu t0, 4 * 32(r); \ + vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ + vmovdqu t1, 5 * 32(r); \ + vmovdqu t2, 6 * 32(r); \ + vmovdqu t3, 7 * 32(r); \ + \ + /* \ + * t0 = klr; \ + * t0 |= lr; \ + * ll ^= t0; \ + */ \ + \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor l4, t0, t0; \ + vpor l5, t1, t1; \ + vpor l6, t2, t2; \ + vpor l7, t3, t3; \ + \ + vpxor l0, t0, l0; \ + vmovdqu l0, 0 * 32(l); \ + vpxor l1, t1, l1; \ + vmovdqu l1, 1 * 32(l); \ + vpxor l2, t2, l2; \ + vmovdqu l2, 2 * 32(l); \ + vpxor l3, t3, l3; \ + vmovdqu l3, 3 * 32(l); + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \ + a3, b3, c3, d3, st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti128 .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio, key) \ + vpbroadcastq key, x0; \ + vpshufb .Lpack_bswap, x0, x0; \ + \ + vpxor 0 * 32(rio), x0, y7; \ + vpxor 1 * 32(rio), x0, y6; \ + vpxor 2 * 32(rio), x0, y5; \ + vpxor 3 * 32(rio), x0, y4; \ + vpxor 4 * 32(rio), x0, y3; \ + vpxor 5 * 32(rio), x0, y2; \ + vpxor 6 * 32(rio), x0, y1; \ + vpxor 7 * 32(rio), x0, y0; \ + vpxor 8 * 32(rio), x0, x7; \ + vpxor 9 * 32(rio), x0, x6; \ + vpxor 10 * 32(rio), x0, x5; \ + vpxor 11 * 32(rio), x0, x4; \ + vpxor 12 * 32(rio), x0, x3; \ + vpxor 13 * 32(rio), x0, x2; \ + vpxor 14 * 32(rio), x0, x1; \ + vpxor 15 * 32(rio), x0, x0; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd) \ + byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \ + y4, y5, y6, y7, (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu y0, 0 * 32(mem_cd); \ + vmovdqu y1, 1 * 32(mem_cd); \ + vmovdqu y2, 2 * 32(mem_cd); \ + vmovdqu y3, 3 * 32(mem_cd); \ + vmovdqu y4, 4 * 32(mem_cd); \ + vmovdqu y5, 5 * 32(mem_cd); \ + vmovdqu y6, 6 * 32(mem_cd); \ + vmovdqu y7, 7 * 32(mem_cd); + +/* de-byteslice, apply post-whitening and store blocks */ +#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, key, stack_tmp0, stack_tmp1) \ + byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \ + y3, y7, x3, x7, stack_tmp0, stack_tmp1); \ + \ + vmovdqu x0, stack_tmp0; \ + \ + vpbroadcastq key, x0; \ + vpshufb .Lpack_bswap, x0, x0; \ + \ + vpxor x0, y7, y7; \ + vpxor x0, y6, y6; \ + vpxor x0, y5, y5; \ + vpxor x0, y4, y4; \ + vpxor x0, y3, y3; \ + vpxor x0, y2, y2; \ + vpxor x0, y1, y1; \ + vpxor x0, y0, y0; \ + vpxor x0, x7, x7; \ + vpxor x0, x6, x6; \ + vpxor x0, x5, x5; \ + vpxor x0, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x0, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor stack_tmp0, x0, x0; + +#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio) \ + vmovdqu x0, 0 * 32(rio); \ + vmovdqu x1, 1 * 32(rio); \ + vmovdqu x2, 2 * 32(rio); \ + vmovdqu x3, 3 * 32(rio); \ + vmovdqu x4, 4 * 32(rio); \ + vmovdqu x5, 5 * 32(rio); \ + vmovdqu x6, 6 * 32(rio); \ + vmovdqu x7, 7 * 32(rio); \ + vmovdqu y0, 8 * 32(rio); \ + vmovdqu y1, 9 * 32(rio); \ + vmovdqu y2, 10 * 32(rio); \ + vmovdqu y3, 11 * 32(rio); \ + vmovdqu y4, 12 * 32(rio); \ + vmovdqu y5, 13 * 32(rio); \ + vmovdqu y6, 14 * 32(rio); \ + vmovdqu y7, 15 * 32(rio); + + +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + +.section .rodata.cst32.pack_bswap, "aM", @progbits, 32 +.align 32 +.Lpack_bswap: + .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 + .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox1, sbox2, sbox3: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s1: + .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 + .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 +.Lpre_tf_hi_s1: + .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a + .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox4: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in <<< 1) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s4: + .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 + .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 +.Lpre_tf_hi_s4: + .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 + .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf + +/* + * post-SubByte transform + * + * post-lookup for sbox1, sbox4: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s1: + .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 + .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 +.Lpost_tf_hi_s1: + .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 + .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c + +/* + * post-SubByte transform + * + * post-lookup for sbox2: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) <<< 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s2: + .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 + .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 +.Lpost_tf_hi_s2: + .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 + .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 + +/* + * post-SubByte transform + * + * post-lookup for sbox3: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) >>> 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s3: + .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 + .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 +.Lpost_tf_hi_s3: + .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 + .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text + +.align 8 +SYM_FUNC_START_LOCAL(__camellia_enc_blk32) + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 512 bytes + * %ymm0..%ymm15: 32 plaintext blocks + * output: + * %ymm0..%ymm15: 32 encrypted blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + FRAME_BEGIN + + leaq 8 * 32(%rax), %rcx; + + inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 0); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX), + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX)); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 8); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX), + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX)); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 16); + + movl $24, %r8d; + cmpl $16, key_length(CTX); + jne .Lenc_max32; + +.Lenc_done: + /* load CD for output */ + vmovdqu 0 * 32(%rcx), %ymm8; + vmovdqu 1 * 32(%rcx), %ymm9; + vmovdqu 2 * 32(%rcx), %ymm10; + vmovdqu 3 * 32(%rcx), %ymm11; + vmovdqu 4 * 32(%rcx), %ymm12; + vmovdqu 5 * 32(%rcx), %ymm13; + vmovdqu 6 * 32(%rcx), %ymm14; + vmovdqu 7 * 32(%rcx), %ymm15; + + outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); + + FRAME_END + RET; + +.align 8 +.Lenc_max32: + movl $32, %r8d; + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX), + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX)); + + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 24); + + jmp .Lenc_done; +SYM_FUNC_END(__camellia_enc_blk32) + +.align 8 +SYM_FUNC_START_LOCAL(__camellia_dec_blk32) + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 512 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %ymm0..%ymm15: 16 encrypted blocks + * output: + * %ymm0..%ymm15: 16 plaintext blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + FRAME_BEGIN + + leaq 8 * 32(%rax), %rcx; + + inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx); + + cmpl $32, %r8d; + je .Ldec_max32; + +.Ldec_max24: + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 16); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (16) * 8) + 8)(CTX), + ((key_table + (16) * 8) + 12)(CTX), + ((key_table + (16) * 8) + 0)(CTX), + ((key_table + (16) * 8) + 4)(CTX)); + + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 8); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (8) * 8) + 8)(CTX), + ((key_table + (8) * 8) + 12)(CTX), + ((key_table + (8) * 8) + 0)(CTX), + ((key_table + (8) * 8) + 4)(CTX)); + + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 0); + + /* load CD for output */ + vmovdqu 0 * 32(%rcx), %ymm8; + vmovdqu 1 * 32(%rcx), %ymm9; + vmovdqu 2 * 32(%rcx), %ymm10; + vmovdqu 3 * 32(%rcx), %ymm11; + vmovdqu 4 * 32(%rcx), %ymm12; + vmovdqu 5 * 32(%rcx), %ymm13; + vmovdqu 6 * 32(%rcx), %ymm14; + vmovdqu 7 * 32(%rcx), %ymm15; + + outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); + + FRAME_END + RET; + +.align 8 +.Ldec_max32: + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 24); + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table + (24) * 8) + 8)(CTX), + ((key_table + (24) * 8) + 12)(CTX), + ((key_table + (24) * 8) + 0)(CTX), + ((key_table + (24) * 8) + 4)(CTX)); + + jmp .Ldec_max24; +SYM_FUNC_END(__camellia_dec_blk32) + +SYM_FUNC_START(camellia_ecb_enc_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + */ + FRAME_BEGIN + + vzeroupper; + + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX)); + + /* now dst can be used as temporary buffer (even in src == dst case) */ + movq %rsi, %rax; + + call __camellia_enc_blk32; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroupper; + + FRAME_END + RET; +SYM_FUNC_END(camellia_ecb_enc_32way) + +SYM_FUNC_START(camellia_ecb_dec_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + */ + FRAME_BEGIN + + vzeroupper; + + cmpl $16, key_length(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX, %r8, 8)); + + /* now dst can be used as temporary buffer (even in src == dst case) */ + movq %rsi, %rax; + + call __camellia_dec_blk32; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroupper; + + FRAME_END + RET; +SYM_FUNC_END(camellia_ecb_dec_32way) + +SYM_FUNC_START(camellia_cbc_dec_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + */ + FRAME_BEGIN + subq $(16 * 32), %rsp; + + vzeroupper; + + cmpl $16, key_length(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX, %r8, 8)); + + cmpq %rsi, %rdx; + je .Lcbc_dec_use_stack; + + /* dst can be used as temporary storage, src is not overwritten. */ + movq %rsi, %rax; + jmp .Lcbc_dec_continue; + +.Lcbc_dec_use_stack: + /* + * dst still in-use (because dst == src), so use stack for temporary + * storage. + */ + movq %rsp, %rax; + +.Lcbc_dec_continue: + call __camellia_dec_blk32; + + vmovdqu %ymm7, (%rax); + vpxor %ymm7, %ymm7, %ymm7; + vinserti128 $1, (%rdx), %ymm7, %ymm7; + vpxor (%rax), %ymm7, %ymm7; + vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6; + vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5; + vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4; + vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3; + vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2; + vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1; + vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0; + vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15; + vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14; + vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13; + vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12; + vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11; + vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10; + vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9; + vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8; + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroupper; + + addq $(16 * 32), %rsp; + FRAME_END + RET; +SYM_FUNC_END(camellia_cbc_dec_32way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S new file mode 100644 index 000000000..347c059f5 --- /dev/null +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -0,0 +1,499 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Camellia Cipher Algorithm (x86_64) + * + * Copyright (C) 2012 Jussi Kivilinna + */ + +#include + +.file "camellia-x86_64-asm_64.S" +.text + +.extern camellia_sp10011110; +.extern camellia_sp22000222; +.extern camellia_sp03303033; +.extern camellia_sp00444404; +.extern camellia_sp02220222; +.extern camellia_sp30333033; +.extern camellia_sp44044404; +.extern camellia_sp11101110; + +#define sp10011110 camellia_sp10011110 +#define sp22000222 camellia_sp22000222 +#define sp03303033 camellia_sp03303033 +#define sp00444404 camellia_sp00444404 +#define sp02220222 camellia_sp02220222 +#define sp30333033 camellia_sp30333033 +#define sp44044404 camellia_sp44044404 +#define sp11101110 camellia_sp11101110 + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct camellia_ctx: */ +#define key_table 0 +#define key_length CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %rsi +#define RIOd %esi + +#define RAB0 %rax +#define RCD0 %rcx +#define RAB1 %rbx +#define RCD1 %rdx + +#define RAB0d %eax +#define RCD0d %ecx +#define RAB1d %ebx +#define RCD1d %edx + +#define RAB0bl %al +#define RCD0bl %cl +#define RAB1bl %bl +#define RCD1bl %dl + +#define RAB0bh %ah +#define RCD0bh %ch +#define RAB1bh %bh +#define RCD1bh %dh + +#define RT0 %rsi +#define RT1 %r12 +#define RT2 %r8 + +#define RT0d %esi +#define RT1d %r12d +#define RT2d %r8d + +#define RT2bl %r8b + +#define RXOR %r9 +#define RR12 %r10 +#define RDST %r11 + +#define RXORd %r9d +#define RXORbl %r9b + +#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $16, ab; \ + xorq T0(, tmp2, 8), dst; \ + xorq T1(, tmp1, 8), dst; + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(ab, subkey, cd) \ + movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ + \ + xorq RT2, cd ## 0; + +#define fls(l, r, kl, kr) \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ + andl l ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, l ## 0; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ + orq r ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, r ## 0; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ + orq l ## 0, RT2; \ + shrq $32, RT2; \ + xorq RT2, l ## 0; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ + andl r ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, r ## 0; + +#define enc_rounds(i) \ + roundsm(RAB, i + 2, RCD); \ + roundsm(RCD, i + 3, RAB); \ + roundsm(RAB, i + 4, RCD); \ + roundsm(RCD, i + 5, RAB); \ + roundsm(RAB, i + 6, RCD); \ + roundsm(RCD, i + 7, RAB); + +#define enc_fls(i) \ + fls(RAB, RCD, i + 0, i + 1); + +#define enc_inpack() \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rolq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rorq $32, RCD0; \ + xorq key_table(CTX), RAB0; + +#define enc_outunpack(op, max) \ + xorq key_table(CTX, max, 8), RCD0; \ + rorq $32, RCD0; \ + bswapq RCD0; \ + op ## q RCD0, (RIO); \ + rolq $32, RAB0; \ + bswapq RAB0; \ + op ## q RAB0, 4*2(RIO); + +#define dec_rounds(i) \ + roundsm(RAB, i + 7, RCD); \ + roundsm(RCD, i + 6, RAB); \ + roundsm(RAB, i + 5, RCD); \ + roundsm(RCD, i + 4, RAB); \ + roundsm(RAB, i + 3, RCD); \ + roundsm(RCD, i + 2, RAB); + +#define dec_fls(i) \ + fls(RAB, RCD, i + 1, i + 0); + +#define dec_inpack(max) \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rolq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rorq $32, RCD0; \ + xorq key_table(CTX, max, 8), RAB0; + +#define dec_outunpack() \ + xorq key_table(CTX), RCD0; \ + rorq $32, RCD0; \ + bswapq RCD0; \ + movq RCD0, (RIO); \ + rolq $32, RAB0; \ + bswapq RAB0; \ + movq RAB0, 4*2(RIO); + +SYM_FUNC_START(__camellia_enc_blk) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool xor + */ + movq %r12, RR12; + + movq %rcx, RXOR; + movq %rsi, RDST; + movq %rdx, RIO; + + enc_inpack(); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + movl $24, RT1d; /* max */ + + cmpb $16, key_length(CTX); + je .L__enc_done; + + enc_fls(24); + enc_rounds(24); + movl $32, RT1d; /* max */ + +.L__enc_done: + testb RXORbl, RXORbl; + movq RDST, RIO; + + jnz .L__enc_xor; + + enc_outunpack(mov, RT1); + + movq RR12, %r12; + RET; + +.L__enc_xor: + enc_outunpack(xor, RT1); + + movq RR12, %r12; + RET; +SYM_FUNC_END(__camellia_enc_blk) + +SYM_FUNC_START(camellia_dec_blk) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + cmpl $16, key_length(CTX); + movl $32, RT2d; + movl $24, RXORd; + cmovel RXORd, RT2d; /* max */ + + movq %r12, RR12; + movq %rsi, RDST; + movq %rdx, RIO; + + dec_inpack(RT2); + + cmpb $24, RT2bl; + je .L__dec_rounds16; + + dec_rounds(24); + dec_fls(24); + +.L__dec_rounds16: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + movq RDST, RIO; + + dec_outunpack(); + + movq RR12, %r12; + RET; +SYM_FUNC_END(camellia_dec_blk) + +/********************************************************************** + 2-way camellia + **********************************************************************/ +#define roundsm2(ab, subkey, cd) \ + movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ + xorq RT2, cd ## 1; \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ + xorq RT2, cd ## 0; \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); + +#define fls2(l, r, kl, kr) \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ + andl l ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, l ## 0; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ + orq r ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, r ## 0; \ + \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ + andl l ## 1d, RT2d; \ + roll $1, RT2d; \ + shlq $32, RT2; \ + xorq RT2, l ## 1; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ + orq r ## 1, RT0; \ + shrq $32, RT0; \ + xorq RT0, r ## 1; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ + orq l ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, l ## 0; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ + andl r ## 0d, RT2d; \ + roll $1, RT2d; \ + shlq $32, RT2; \ + xorq RT2, r ## 0; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ + orq l ## 1, RT0; \ + shrq $32, RT0; \ + xorq RT0, l ## 1; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ + andl r ## 1d, RT1d; \ + roll $1, RT1d; \ + shlq $32, RT1; \ + xorq RT1, r ## 1; + +#define enc_rounds2(i) \ + roundsm2(RAB, i + 2, RCD); \ + roundsm2(RCD, i + 3, RAB); \ + roundsm2(RAB, i + 4, RCD); \ + roundsm2(RCD, i + 5, RAB); \ + roundsm2(RAB, i + 6, RCD); \ + roundsm2(RCD, i + 7, RAB); + +#define enc_fls2(i) \ + fls2(RAB, RCD, i + 0, i + 1); + +#define enc_inpack2() \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rorq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rolq $32, RCD0; \ + xorq key_table(CTX), RAB0; \ + \ + movq 8*2(RIO), RAB1; \ + bswapq RAB1; \ + rorq $32, RAB1; \ + movq 12*2(RIO), RCD1; \ + bswapq RCD1; \ + rolq $32, RCD1; \ + xorq key_table(CTX), RAB1; + +#define enc_outunpack2(op, max) \ + xorq key_table(CTX, max, 8), RCD0; \ + rolq $32, RCD0; \ + bswapq RCD0; \ + op ## q RCD0, (RIO); \ + rorq $32, RAB0; \ + bswapq RAB0; \ + op ## q RAB0, 4*2(RIO); \ + \ + xorq key_table(CTX, max, 8), RCD1; \ + rolq $32, RCD1; \ + bswapq RCD1; \ + op ## q RCD1, 8*2(RIO); \ + rorq $32, RAB1; \ + bswapq RAB1; \ + op ## q RAB1, 12*2(RIO); + +#define dec_rounds2(i) \ + roundsm2(RAB, i + 7, RCD); \ + roundsm2(RCD, i + 6, RAB); \ + roundsm2(RAB, i + 5, RCD); \ + roundsm2(RCD, i + 4, RAB); \ + roundsm2(RAB, i + 3, RCD); \ + roundsm2(RCD, i + 2, RAB); + +#define dec_fls2(i) \ + fls2(RAB, RCD, i + 1, i + 0); + +#define dec_inpack2(max) \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rorq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rolq $32, RCD0; \ + xorq key_table(CTX, max, 8), RAB0; \ + \ + movq 8*2(RIO), RAB1; \ + bswapq RAB1; \ + rorq $32, RAB1; \ + movq 12*2(RIO), RCD1; \ + bswapq RCD1; \ + rolq $32, RCD1; \ + xorq key_table(CTX, max, 8), RAB1; + +#define dec_outunpack2() \ + xorq key_table(CTX), RCD0; \ + rolq $32, RCD0; \ + bswapq RCD0; \ + movq RCD0, (RIO); \ + rorq $32, RAB0; \ + bswapq RAB0; \ + movq RAB0, 4*2(RIO); \ + \ + xorq key_table(CTX), RCD1; \ + rolq $32, RCD1; \ + bswapq RCD1; \ + movq RCD1, 8*2(RIO); \ + rorq $32, RAB1; \ + bswapq RAB1; \ + movq RAB1, 12*2(RIO); + +SYM_FUNC_START(__camellia_enc_blk_2way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool xor + */ + pushq %rbx; + + movq %r12, RR12; + movq %rcx, RXOR; + movq %rsi, RDST; + movq %rdx, RIO; + + enc_inpack2(); + + enc_rounds2(0); + enc_fls2(8); + enc_rounds2(8); + enc_fls2(16); + enc_rounds2(16); + movl $24, RT2d; /* max */ + + cmpb $16, key_length(CTX); + je .L__enc2_done; + + enc_fls2(24); + enc_rounds2(24); + movl $32, RT2d; /* max */ + +.L__enc2_done: + test RXORbl, RXORbl; + movq RDST, RIO; + jnz .L__enc2_xor; + + enc_outunpack2(mov, RT2); + + movq RR12, %r12; + popq %rbx; + RET; + +.L__enc2_xor: + enc_outunpack2(xor, RT2); + + movq RR12, %r12; + popq %rbx; + RET; +SYM_FUNC_END(__camellia_enc_blk_2way) + +SYM_FUNC_START(camellia_dec_blk_2way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + cmpl $16, key_length(CTX); + movl $32, RT2d; + movl $24, RXORd; + cmovel RXORd, RT2d; /* max */ + + movq %rbx, RXOR; + movq %r12, RR12; + movq %rsi, RDST; + movq %rdx, RIO; + + dec_inpack2(RT2); + + cmpb $24, RT2bl; + je .L__dec2_rounds16; + + dec_rounds2(24); + dec_fls2(24); + +.L__dec2_rounds16: + dec_rounds2(16); + dec_fls2(16); + dec_rounds2(8); + dec_fls2(8); + dec_rounds2(0); + + movq RDST, RIO; + + dec_outunpack2(); + + movq RR12, %r12; + movq RXOR, %rbx; + RET; +SYM_FUNC_END(camellia_dec_blk_2way) diff --git a/arch/x86/crypto/camellia.h b/arch/x86/crypto/camellia.h new file mode 100644 index 000000000..1dcea79e8 --- /dev/null +++ b/arch/x86/crypto/camellia.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_CAMELLIA_H +#define ASM_X86_CAMELLIA_H + +#include +#include +#include + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 +#define CAMELLIA_PARALLEL_BLOCKS 2 + +struct crypto_skcipher; + +struct camellia_ctx { + u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u32 key_length; +}; + +extern int __camellia_setkey(struct camellia_ctx *cctx, + const unsigned char *key, + unsigned int key_len); + +/* regular block cipher functions */ +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); + +/* 2-way parallel cipher functions */ +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); + +/* 16-way parallel cipher functions (avx/aes-ni) */ +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); + +static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, true); +} + +static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, true); +} + +/* glue helpers */ +extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src); + +#endif /* ASM_X86_CAMELLIA_H */ diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c new file mode 100644 index 000000000..e7e4d64e9 --- /dev/null +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia + * + * Copyright © 2013 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include + +#include "camellia.h" +#include "ecb_cbc_helpers.h" + +#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 +#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 + +/* 32-way AVX2/AES-NI parallel cipher functions */ +asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); + +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_enc_32way); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_dec_32way); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_cbc_dec_32way); + CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); +} + +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; + +static int __init camellia_aesni_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); +} + +static void __exit camellia_aesni_fini(void) +{ + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); +} + +module_init(camellia_aesni_init); +module_exit(camellia_aesni_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c new file mode 100644 index 000000000..c7ccf63e7 --- /dev/null +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia + * + * Copyright © 2012-2013 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include + +#include "camellia.h" +#include "ecb_cbc_helpers.h" + +#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 + +/* 16-way parallel cipher functions (avx/aes-ni) */ +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way); + +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); + +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); + +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); +} + +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + } +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; + +static int __init camellia_aesni_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); +} + +static void __exit camellia_aesni_fini(void) +{ + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); +} + +module_init(camellia_aesni_init); +module_exit(camellia_aesni_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c new file mode 100644 index 000000000..d45e9c0c4 --- /dev/null +++ b/arch/x86/crypto/camellia_glue.c @@ -0,0 +1,1417 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for assembler optimized version of Camellia + * + * Copyright (c) 2012 Jussi Kivilinna + * + * Camellia parts based on code by: + * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) + */ + +#include +#include +#include +#include +#include +#include + +#include "camellia.h" +#include "ecb_cbc_helpers.h" + +/* regular block cipher functions */ +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); +EXPORT_SYMBOL_GPL(__camellia_enc_blk); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(camellia_dec_blk); + +/* 2-way parallel cipher functions */ +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +/* camellia sboxes */ +__visible const u64 camellia_sp10011110[256] = { + 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, + 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, + 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, + 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, + 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, + 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, + 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, + 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, + 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, + 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, + 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, + 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, + 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, + 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, + 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, + 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, + 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, + 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, + 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, + 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, + 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, + 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, + 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, + 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, + 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, + 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, + 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, + 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, + 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, + 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, + 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, + 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, + 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, + 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, + 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, + 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, + 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, + 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, + 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, + 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, + 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, + 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, + 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, + 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, + 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, + 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, + 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, + 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, + 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, + 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, + 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, + 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, + 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, + 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, + 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, + 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, + 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, + 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, + 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, + 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, + 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, + 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, + 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, + 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, + 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, + 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, + 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, + 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, + 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, + 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, + 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, + 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, + 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, + 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, + 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, + 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, + 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, + 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, + 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, + 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, + 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, + 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, + 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, + 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, + 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, + 0x9e00009e9e9e9e00ULL, +}; + +__visible const u64 camellia_sp22000222[256] = { + 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, + 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, + 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, + 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, + 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, + 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, + 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, + 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, + 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, + 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, + 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, + 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, + 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, + 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, + 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, + 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, + 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, + 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, + 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, + 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, + 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, + 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, + 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, + 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, + 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, + 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, + 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, + 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, + 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, + 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, + 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, + 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, + 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, + 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, + 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, + 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, + 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, + 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, + 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, + 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, + 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, + 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, + 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, + 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, + 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, + 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, + 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, + 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, + 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, + 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, + 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, + 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, + 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, + 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, + 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, + 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, + 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, + 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, + 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, + 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, + 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, + 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, + 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, + 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, + 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, + 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, + 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, + 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, + 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, + 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, + 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, + 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, + 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, + 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, + 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, + 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, + 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, + 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, + 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, + 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, + 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, + 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, + 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, + 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, + 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, + 0x3d3d0000003d3d3dULL, +}; + +__visible const u64 camellia_sp03303033[256] = { + 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, + 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, + 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, + 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, + 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, + 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, + 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, + 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, + 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, + 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, + 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, + 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, + 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, + 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, + 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, + 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, + 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, + 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, + 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, + 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, + 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, + 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, + 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, + 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, + 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, + 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, + 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, + 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, + 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, + 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, + 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, + 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, + 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, + 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, + 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, + 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, + 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, + 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, + 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, + 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, + 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, + 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, + 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, + 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, + 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, + 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, + 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, + 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, + 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, + 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, + 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, + 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, + 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, + 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, + 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, + 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, + 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, + 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, + 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, + 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, + 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, + 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, + 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, + 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, + 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, + 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, + 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, + 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, + 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, + 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, + 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, + 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, + 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, + 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, + 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, + 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, + 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, + 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, + 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, + 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, + 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, + 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, + 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, + 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, + 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, + 0x004f4f004f004f4fULL, +}; + +__visible const u64 camellia_sp00444404[256] = { + 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, + 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, + 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, + 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, + 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, + 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, + 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, + 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, + 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, + 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, + 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, + 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, + 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, + 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, + 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, + 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, + 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, + 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, + 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, + 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, + 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, + 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, + 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, + 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, + 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, + 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, + 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, + 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, + 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, + 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, + 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, + 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, + 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, + 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, + 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, + 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, + 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, + 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, + 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, + 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, + 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, + 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, + 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, + 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, + 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, + 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, + 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, + 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, + 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, + 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, + 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, + 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, + 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, + 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, + 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, + 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, + 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, + 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, + 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, + 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, + 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, + 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, + 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, + 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, + 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, + 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, + 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, + 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, + 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, + 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, + 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, + 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, + 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, + 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, + 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, + 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, + 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, + 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, + 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, + 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, + 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, + 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, + 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, + 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, + 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, + 0x00009e9e9e9e009eULL, +}; + +__visible const u64 camellia_sp02220222[256] = { + 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, + 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, + 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, + 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, + 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, + 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, + 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, + 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, + 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, + 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, + 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, + 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, + 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, + 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, + 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, + 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, + 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, + 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, + 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, + 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, + 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, + 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, + 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, + 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, + 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, + 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, + 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, + 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, + 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, + 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, + 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, + 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, + 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, + 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, + 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, + 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, + 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, + 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, + 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, + 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, + 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, + 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, + 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, + 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, + 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, + 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, + 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, + 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, + 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, + 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, + 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, + 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, + 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, + 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, + 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, + 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, + 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, + 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, + 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, + 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, + 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, + 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, + 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, + 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, + 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, + 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, + 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, + 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, + 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, + 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, + 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, + 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, + 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, + 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, + 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, + 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, + 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, + 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, + 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, + 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, + 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, + 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, + 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, + 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, + 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, + 0x003d3d3d003d3d3dULL, +}; + +__visible const u64 camellia_sp30333033[256] = { + 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, + 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, + 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, + 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, + 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, + 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, + 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, + 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, + 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, + 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, + 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, + 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, + 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, + 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, + 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, + 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, + 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, + 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, + 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, + 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, + 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, + 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, + 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, + 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, + 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, + 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, + 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, + 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, + 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, + 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, + 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, + 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, + 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, + 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, + 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, + 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, + 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, + 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, + 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, + 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, + 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, + 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, + 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, + 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, + 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, + 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, + 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, + 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, + 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, + 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, + 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, + 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, + 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, + 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, + 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, + 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, + 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, + 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, + 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, + 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, + 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, + 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, + 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, + 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, + 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, + 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, + 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, + 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, + 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, + 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, + 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, + 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, + 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, + 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, + 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, + 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, + 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, + 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, + 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, + 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, + 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, + 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, + 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, + 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, + 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, + 0x4f004f4f4f004f4fULL, +}; + +__visible const u64 camellia_sp44044404[256] = { + 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, + 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, + 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, + 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, + 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, + 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, + 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, + 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, + 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, + 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, + 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, + 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, + 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, + 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, + 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, + 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, + 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, + 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, + 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, + 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, + 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, + 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, + 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, + 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, + 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, + 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, + 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, + 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, + 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, + 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, + 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, + 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, + 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, + 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, + 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, + 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, + 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, + 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, + 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, + 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, + 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, + 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, + 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, + 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, + 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, + 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, + 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, + 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, + 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, + 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, + 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, + 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, + 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, + 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, + 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, + 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, + 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, + 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, + 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, + 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, + 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, + 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, + 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, + 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, + 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, + 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, + 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, + 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, + 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, + 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, + 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, + 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, + 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, + 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, + 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, + 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, + 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, + 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, + 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, + 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, + 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, + 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, + 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, + 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, + 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, + 0x9e9e009e9e9e009eULL, +}; + +__visible const u64 camellia_sp11101110[256] = { + 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, + 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, + 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, + 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, + 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, + 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, + 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, + 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, + 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, + 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, + 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, + 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, + 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, + 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, + 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, + 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, + 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, + 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, + 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, + 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, + 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, + 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, + 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, + 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, + 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, + 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, + 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, + 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, + 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, + 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, + 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, + 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, + 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, + 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, + 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, + 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, + 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, + 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, + 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, + 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, + 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, + 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, + 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, + 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, + 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, + 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, + 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, + 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, + 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, + 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, + 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, + 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, + 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, + 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, + 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, + 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, + 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, + 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, + 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, + 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, + 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, + 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, + 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, + 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, + 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, + 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, + 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, + 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, + 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, + 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, + 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, + 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, + 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, + 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, + 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, + 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, + 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, + 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, + 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, + 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, + 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, + 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, + 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, + 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, + 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, + 0x9e9e9e009e9e9e00ULL, +}; + +/* key constants */ +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* macros */ +#define ROLDQ(l, r, bits) ({ \ + u64 t = l; \ + l = (l << bits) | (r >> (64 - bits)); \ + r = (r << bits) | (t >> (64 - bits)); \ +}) + +#define CAMELLIA_F(x, kl, kr, y) ({ \ + u64 ii = x ^ (((u64)kl << 32) | kr); \ + y = camellia_sp11101110[(uint8_t)ii]; \ + y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp30333033[(uint8_t)ii]; \ + y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp00444404[(uint8_t)ii]; \ + y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp22000222[(uint8_t)ii]; \ + y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ + y = ror64(y, 32); \ +}) + +#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) + +static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) +{ + u64 kw4, tt; + u32 dw, tl, tr; + + /* absorb kw2 to other subkeys */ + /* round 2 */ + subRL[3] ^= subRL[1]; + /* round 4 */ + subRL[5] ^= subRL[1]; + /* round 6 */ + subRL[7] ^= subRL[1]; + + subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; + /* modified for FLinv(kl2) */ + dw = (subRL[1] & subRL[9]) >> 32; + subRL[1] ^= rol32(dw, 1); + + /* round 8 */ + subRL[11] ^= subRL[1]; + /* round 10 */ + subRL[13] ^= subRL[1]; + /* round 12 */ + subRL[15] ^= subRL[1]; + + subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; + /* modified for FLinv(kl4) */ + dw = (subRL[1] & subRL[17]) >> 32; + subRL[1] ^= rol32(dw, 1); + + /* round 14 */ + subRL[19] ^= subRL[1]; + /* round 16 */ + subRL[21] ^= subRL[1]; + /* round 18 */ + subRL[23] ^= subRL[1]; + + if (max == 24) { + /* kw3 */ + subRL[24] ^= subRL[1]; + + /* absorb kw4 to other subkeys */ + kw4 = subRL[25]; + } else { + subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; + /* modified for FLinv(kl6) */ + dw = (subRL[1] & subRL[25]) >> 32; + subRL[1] ^= rol32(dw, 1); + + /* round 20 */ + subRL[27] ^= subRL[1]; + /* round 22 */ + subRL[29] ^= subRL[1]; + /* round 24 */ + subRL[31] ^= subRL[1]; + /* kw3 */ + subRL[32] ^= subRL[1]; + + /* absorb kw4 to other subkeys */ + kw4 = subRL[33]; + /* round 23 */ + subRL[30] ^= kw4; + /* round 21 */ + subRL[28] ^= kw4; + /* round 19 */ + subRL[26] ^= kw4; + + kw4 ^= (kw4 & ~subRL[24]) << 32; + /* modified for FL(kl5) */ + dw = (kw4 & subRL[24]) >> 32; + kw4 ^= rol32(dw, 1); + } + + /* round 17 */ + subRL[22] ^= kw4; + /* round 15 */ + subRL[20] ^= kw4; + /* round 13 */ + subRL[18] ^= kw4; + + kw4 ^= (kw4 & ~subRL[16]) << 32; + /* modified for FL(kl3) */ + dw = (kw4 & subRL[16]) >> 32; + kw4 ^= rol32(dw, 1); + + /* round 11 */ + subRL[14] ^= kw4; + /* round 9 */ + subRL[12] ^= kw4; + /* round 7 */ + subRL[10] ^= kw4; + + kw4 ^= (kw4 & ~subRL[8]) << 32; + /* modified for FL(kl1) */ + dw = (kw4 & subRL[8]) >> 32; + kw4 ^= rol32(dw, 1); + + /* round 5 */ + subRL[6] ^= kw4; + /* round 3 */ + subRL[4] ^= kw4; + /* round 1 */ + subRL[2] ^= kw4; + /* kw1 */ + subRL[0] ^= kw4; + + /* key XOR is end of F-function */ + SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ + SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ + SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ + SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ + SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ + SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ + + tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); + dw = tl & (subRL[8] >> 32); /* FL(kl1) */ + tr = subRL[10] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ + SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ + SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ + + tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); + dw = tl & (subRL[9] >> 32); /* FLinv(kl2) */ + tr = subRL[7] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ + SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ + SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ + SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ + SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ + + tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); + dw = tl & (subRL[16] >> 32); /* FL(kl3) */ + tr = subRL[18] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ + SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ + SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ + + tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); + dw = tl & (subRL[17] >> 32); /* FLinv(kl4) */ + tr = subRL[15] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ + SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ + SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ + SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ + SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ + + if (max == 24) { + SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ + SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ + } else { + tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); + dw = tl & (subRL[24] >> 32); /* FL(kl5) */ + tr = subRL[26] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ + SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ + SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ + + tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); + dw = tl & (subRL[25] >> 32); /* FLinv(kl6) */ + tr = subRL[23] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ + SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ + SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ + SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ + SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ + SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ + SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ + } +} + +static void camellia_setup128(const unsigned char *key, u64 *subkey) +{ + u64 kl, kr, ww; + u64 subRL[26]; + + /** + * k == kl || kr (|| is concatenation) + */ + kl = get_unaligned_be64(key); + kr = get_unaligned_be64(key + 8); + + /* generate KL dependent subkeys */ + /* kw1 */ + subRL[0] = kl; + /* kw2 */ + subRL[1] = kr; + + /* rotation left shift 15bit */ + ROLDQ(kl, kr, 15); + + /* k3 */ + subRL[4] = kl; + /* k4 */ + subRL[5] = kr; + + /* rotation left shift 15+30bit */ + ROLDQ(kl, kr, 30); + + /* k7 */ + subRL[10] = kl; + /* k8 */ + subRL[11] = kr; + + /* rotation left shift 15+30+15bit */ + ROLDQ(kl, kr, 15); + + /* k10 */ + subRL[13] = kr; + /* rotation left shift 15+30+15+17 bit */ + ROLDQ(kl, kr, 17); + + /* kl3 */ + subRL[16] = kl; + /* kl4 */ + subRL[17] = kr; + + /* rotation left shift 15+30+15+17+17 bit */ + ROLDQ(kl, kr, 17); + + /* k13 */ + subRL[18] = kl; + /* k14 */ + subRL[19] = kr; + + /* rotation left shift 15+30+15+17+17+17 bit */ + ROLDQ(kl, kr, 17); + + /* k17 */ + subRL[22] = kl; + /* k18 */ + subRL[23] = kr; + + /* generate KA */ + kl = subRL[0]; + kr = subRL[1]; + CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); + + /* current status == (kll, klr, w0, w1) */ + CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); + kl ^= ww; + + /* generate KA dependent subkeys */ + /* k1, k2 */ + subRL[2] = kl; + subRL[3] = kr; + ROLDQ(kl, kr, 15); + /* k5,k6 */ + subRL[6] = kl; + subRL[7] = kr; + ROLDQ(kl, kr, 15); + /* kl1, kl2 */ + subRL[8] = kl; + subRL[9] = kr; + ROLDQ(kl, kr, 15); + /* k9 */ + subRL[12] = kl; + ROLDQ(kl, kr, 15); + /* k11, k12 */ + subRL[14] = kl; + subRL[15] = kr; + ROLDQ(kl, kr, 34); + /* k15, k16 */ + subRL[20] = kl; + subRL[21] = kr; + ROLDQ(kl, kr, 17); + /* kw3, kw4 */ + subRL[24] = kl; + subRL[25] = kr; + + camellia_setup_tail(subkey, subRL, 24); +} + +static void camellia_setup256(const unsigned char *key, u64 *subkey) +{ + u64 kl, kr; /* left half of key */ + u64 krl, krr; /* right half of key */ + u64 ww; /* temporary variables */ + u64 subRL[34]; + + /** + * key = (kl || kr || krl || krr) (|| is concatenation) + */ + kl = get_unaligned_be64(key); + kr = get_unaligned_be64(key + 8); + krl = get_unaligned_be64(key + 16); + krr = get_unaligned_be64(key + 24); + + /* generate KL dependent subkeys */ + /* kw1 */ + subRL[0] = kl; + /* kw2 */ + subRL[1] = kr; + ROLDQ(kl, kr, 45); + /* k9 */ + subRL[12] = kl; + /* k10 */ + subRL[13] = kr; + ROLDQ(kl, kr, 15); + /* kl3 */ + subRL[16] = kl; + /* kl4 */ + subRL[17] = kr; + ROLDQ(kl, kr, 17); + /* k17 */ + subRL[22] = kl; + /* k18 */ + subRL[23] = kr; + ROLDQ(kl, kr, 34); + /* k23 */ + subRL[30] = kl; + /* k24 */ + subRL[31] = kr; + + /* generate KR dependent subkeys */ + ROLDQ(krl, krr, 15); + /* k3 */ + subRL[4] = krl; + /* k4 */ + subRL[5] = krr; + ROLDQ(krl, krr, 15); + /* kl1 */ + subRL[8] = krl; + /* kl2 */ + subRL[9] = krr; + ROLDQ(krl, krr, 30); + /* k13 */ + subRL[18] = krl; + /* k14 */ + subRL[19] = krr; + ROLDQ(krl, krr, 34); + /* k19 */ + subRL[26] = krl; + /* k20 */ + subRL[27] = krr; + ROLDQ(krl, krr, 34); + + /* generate KA */ + kl = subRL[0] ^ krl; + kr = subRL[1] ^ krr; + + CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); + kl ^= krl; + CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); + kr ^= ww ^ krr; + CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); + kl ^= ww; + + /* generate KB */ + krl ^= kl; + krr ^= kr; + CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); + krr ^= ww; + CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); + krl ^= ww; + + /* generate KA dependent subkeys */ + ROLDQ(kl, kr, 15); + /* k5 */ + subRL[6] = kl; + /* k6 */ + subRL[7] = kr; + ROLDQ(kl, kr, 30); + /* k11 */ + subRL[14] = kl; + /* k12 */ + subRL[15] = kr; + /* rotation left shift 32bit */ + ROLDQ(kl, kr, 32); + /* kl5 */ + subRL[24] = kl; + /* kl6 */ + subRL[25] = kr; + /* rotation left shift 17 from k11,k12 -> k21,k22 */ + ROLDQ(kl, kr, 17); + /* k21 */ + subRL[28] = kl; + /* k22 */ + subRL[29] = kr; + + /* generate KB dependent subkeys */ + /* k1 */ + subRL[2] = krl; + /* k2 */ + subRL[3] = krr; + ROLDQ(krl, krr, 30); + /* k7 */ + subRL[10] = krl; + /* k8 */ + subRL[11] = krr; + ROLDQ(krl, krr, 30); + /* k15 */ + subRL[20] = krl; + /* k16 */ + subRL[21] = krr; + ROLDQ(krl, krr, 51); + /* kw3 */ + subRL[32] = krl; + /* kw4 */ + subRL[33] = krr; + + camellia_setup_tail(subkey, subRL, 32); +} + +static void camellia_setup192(const unsigned char *key, u64 *subkey) +{ + unsigned char kk[32]; + u64 krl, krr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krl, key+16, 8); + krr = ~krl; + memcpy(kk+24, (unsigned char *)&krr, 8); + camellia_setup256(kk, subkey); +} + +int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, + unsigned int key_len) +{ + if (key_len != 16 && key_len != 24 && key_len != 32) + return -EINVAL; + + cctx->key_length = key_len; + + switch (key_len) { + case 16: + camellia_setup128(key, cctx->key_table); + break; + case 24: + camellia_setup192(key, cctx->key_table); + break; + case 32: + camellia_setup256(key, cctx->key_table); + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(__camellia_setkey); + +static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len); +} + +static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + return camellia_setkey(&tfm->base, key, key_len); +} + +void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src) +{ + u8 buf[CAMELLIA_BLOCK_SIZE]; + const u8 *iv = src; + + if (dst == src) + iv = memcpy(buf, iv, sizeof(buf)); + camellia_dec_blk_2way(ctx, dst, src); + crypto_xor(dst + CAMELLIA_BLOCK_SIZE, iv, CAMELLIA_BLOCK_SIZE); +} +EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); +} + +static struct crypto_alg camellia_cipher_alg = { + .cra_name = "camellia", + .cra_driver_name = "camellia-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, + .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, + .cia_setkey = camellia_setkey, + .cia_encrypt = camellia_encrypt, + .cia_decrypt = camellia_decrypt + } + } +}; + +static struct skcipher_alg camellia_skcipher_algs[] = { + { + .base.cra_name = "ecb(camellia)", + .base.cra_driver_name = "ecb-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(camellia)", + .base.cra_driver_name = "cbc-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + } +}; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, camellia-asm is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init camellia_init(void) +{ + int err; + + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "camellia-x86_64: performance on this CPU " + "would be suboptimal: disabling " + "camellia-x86_64.\n"); + return -ENODEV; + } + + err = crypto_register_alg(&camellia_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); + if (err) + crypto_unregister_alg(&camellia_cipher_alg); + + return err; +} + +static void __exit camellia_fini(void) +{ + crypto_unregister_alg(&camellia_cipher_alg); + crypto_unregister_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); +} + +module_init(camellia_init); +module_exit(camellia_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S new file mode 100644 index 000000000..b258af420 --- /dev/null +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -0,0 +1,563 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2012 Jussi Kivilinna + */ + +#include +#include + +.file "cast5-avx-x86_64-asm_64.S" + +.extern cast_s1 +.extern cast_s2 +.extern cast_s3 +.extern cast_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (16*4) +#define rr ((16*4)+16) + +/* s-boxes */ +#define s1 cast_s1 +#define s2 cast_s2 +#define s3 cast_s3 +#define s4 cast_s4 + +/********************************************************************** + 16-way AVX cast5 + **********************************************************************/ +#define CTX %r15 + +#define RL1 %xmm0 +#define RR1 %xmm1 +#define RL2 %xmm2 +#define RR2 %xmm3 +#define RL3 %xmm4 +#define RR3 %xmm5 +#define RL4 %xmm6 +#define RR4 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 + +#define R32 %xmm13 +#define R1ST %xmm14 + +#define RTMP %xmm15 + +#define RID1 %rdi +#define RID1d %edi +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ + \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) + +#define subround(a1, b1, a2, b2, f) \ + F ## f ## _2(a1, b1, a2, b2); + +#define round(l, r, n, f) \ + vbroadcastss (km+(4*n))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; \ + subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \ + subround(l ## 3, r ## 3, l ## 4, r ## 4, f); + +#define enc_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; + +#define dec_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; \ + vpshufb .Lbswap128_mask, RKR, RKR; + +#define transpose_2x4(x0, x1, t0, t1) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t1; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; + +#define inpack_blocks(x0, x1, t0, t1, rmask) \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + \ + transpose_2x4(x0, x1, t0, t1) + +#define outunpack_blocks(x0, x1, t0, t1, rmask) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; + +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16 +.align 16 +.Lbswap_iv_mask: + .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.16_mask, "aM", @progbits, 4 +.align 4 +.L16_mask: + .byte 16, 16, 16, 16 +.section .rodata.cst4.32_mask, "aM", @progbits, 4 +.align 4 +.L32_mask: + .byte 32, 0, 0, 0 +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text + +.align 16 +SYM_FUNC_START_LOCAL(__cast5_enc_blk16) + /* input: + * %rdi: ctx + * RL1: blocks 1 and 2 + * RR1: blocks 3 and 4 + * RL2: blocks 5 and 6 + * RR2: blocks 7 and 8 + * RL3: blocks 9 and 10 + * RR3: blocks 11 and 12 + * RL4: blocks 13 and 14 + * RR4: blocks 15 and 16 + * output: + * RL1: encrypted blocks 1 and 2 + * RR1: encrypted blocks 3 and 4 + * RL2: encrypted blocks 5 and 6 + * RR2: encrypted blocks 7 and 8 + * RL3: encrypted blocks 9 and 10 + * RR3: encrypted blocks 11 and 12 + * RL4: encrypted blocks 13 and 14 + * RR4: encrypted blocks 15 and 16 + */ + + pushq %r15; + pushq %rbx; + + movq %rdi, CTX; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + enc_preload_rkr(); + + inpack_blocks(RL1, RR1, RTMP, RX, RKM); + inpack_blocks(RL2, RR2, RTMP, RX, RKM); + inpack_blocks(RL3, RR3, RTMP, RX, RKM); + inpack_blocks(RL4, RR4, RTMP, RX, RKM); + + round(RL, RR, 0, 1); + round(RR, RL, 1, 2); + round(RL, RR, 2, 3); + round(RR, RL, 3, 1); + round(RL, RR, 4, 2); + round(RR, RL, 5, 3); + round(RL, RR, 6, 1); + round(RR, RL, 7, 2); + round(RL, RR, 8, 3); + round(RR, RL, 9, 1); + round(RL, RR, 10, 2); + round(RR, RL, 11, 3); + + movzbl rr(CTX), %eax; + testl %eax, %eax; + jnz .L__skip_enc; + + round(RL, RR, 12, 1); + round(RR, RL, 13, 2); + round(RL, RR, 14, 3); + round(RR, RL, 15, 1); + +.L__skip_enc: + popq %rbx; + popq %r15; + + vmovdqa .Lbswap_mask, RKM; + + outunpack_blocks(RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(RR2, RL2, RTMP, RX, RKM); + outunpack_blocks(RR3, RL3, RTMP, RX, RKM); + outunpack_blocks(RR4, RL4, RTMP, RX, RKM); + + RET; +SYM_FUNC_END(__cast5_enc_blk16) + +.align 16 +SYM_FUNC_START_LOCAL(__cast5_dec_blk16) + /* input: + * %rdi: ctx + * RL1: encrypted blocks 1 and 2 + * RR1: encrypted blocks 3 and 4 + * RL2: encrypted blocks 5 and 6 + * RR2: encrypted blocks 7 and 8 + * RL3: encrypted blocks 9 and 10 + * RR3: encrypted blocks 11 and 12 + * RL4: encrypted blocks 13 and 14 + * RR4: encrypted blocks 15 and 16 + * output: + * RL1: decrypted blocks 1 and 2 + * RR1: decrypted blocks 3 and 4 + * RL2: decrypted blocks 5 and 6 + * RR2: decrypted blocks 7 and 8 + * RL3: decrypted blocks 9 and 10 + * RR3: decrypted blocks 11 and 12 + * RL4: decrypted blocks 13 and 14 + * RR4: decrypted blocks 15 and 16 + */ + + pushq %r15; + pushq %rbx; + + movq %rdi, CTX; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + dec_preload_rkr(); + + inpack_blocks(RL1, RR1, RTMP, RX, RKM); + inpack_blocks(RL2, RR2, RTMP, RX, RKM); + inpack_blocks(RL3, RR3, RTMP, RX, RKM); + inpack_blocks(RL4, RR4, RTMP, RX, RKM); + + movzbl rr(CTX), %eax; + testl %eax, %eax; + jnz .L__skip_dec; + + round(RL, RR, 15, 1); + round(RR, RL, 14, 3); + round(RL, RR, 13, 2); + round(RR, RL, 12, 1); + +.L__dec_tail: + round(RL, RR, 11, 3); + round(RR, RL, 10, 2); + round(RL, RR, 9, 1); + round(RR, RL, 8, 3); + round(RL, RR, 7, 2); + round(RR, RL, 6, 1); + round(RL, RR, 5, 3); + round(RR, RL, 4, 2); + round(RL, RR, 3, 1); + round(RR, RL, 2, 3); + round(RL, RR, 1, 2); + round(RR, RL, 0, 1); + + vmovdqa .Lbswap_mask, RKM; + popq %rbx; + popq %r15; + + outunpack_blocks(RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(RR2, RL2, RTMP, RX, RKM); + outunpack_blocks(RR3, RL3, RTMP, RX, RKM); + outunpack_blocks(RR4, RL4, RTMP, RX, RKM); + + RET; + +.L__skip_dec: + vpsrldq $4, RKR, RKR; + jmp .L__dec_tail; +SYM_FUNC_END(__cast5_dec_blk16) + +SYM_FUNC_START(cast5_ecb_enc_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + + vmovdqu (0*4*4)(%rdx), RL1; + vmovdqu (1*4*4)(%rdx), RR1; + vmovdqu (2*4*4)(%rdx), RL2; + vmovdqu (3*4*4)(%rdx), RR2; + vmovdqu (4*4*4)(%rdx), RL3; + vmovdqu (5*4*4)(%rdx), RR3; + vmovdqu (6*4*4)(%rdx), RL4; + vmovdqu (7*4*4)(%rdx), RR4; + + call __cast5_enc_blk16; + + vmovdqu RR1, (0*4*4)(%r11); + vmovdqu RL1, (1*4*4)(%r11); + vmovdqu RR2, (2*4*4)(%r11); + vmovdqu RL2, (3*4*4)(%r11); + vmovdqu RR3, (4*4*4)(%r11); + vmovdqu RL3, (5*4*4)(%r11); + vmovdqu RR4, (6*4*4)(%r11); + vmovdqu RL4, (7*4*4)(%r11); + + popq %r15; + FRAME_END + RET; +SYM_FUNC_END(cast5_ecb_enc_16way) + +SYM_FUNC_START(cast5_ecb_dec_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + + vmovdqu (0*4*4)(%rdx), RL1; + vmovdqu (1*4*4)(%rdx), RR1; + vmovdqu (2*4*4)(%rdx), RL2; + vmovdqu (3*4*4)(%rdx), RR2; + vmovdqu (4*4*4)(%rdx), RL3; + vmovdqu (5*4*4)(%rdx), RR3; + vmovdqu (6*4*4)(%rdx), RL4; + vmovdqu (7*4*4)(%rdx), RR4; + + call __cast5_dec_blk16; + + vmovdqu RR1, (0*4*4)(%r11); + vmovdqu RL1, (1*4*4)(%r11); + vmovdqu RR2, (2*4*4)(%r11); + vmovdqu RL2, (3*4*4)(%r11); + vmovdqu RR3, (4*4*4)(%r11); + vmovdqu RL3, (5*4*4)(%r11); + vmovdqu RR4, (6*4*4)(%r11); + vmovdqu RL4, (7*4*4)(%r11); + + popq %r15; + FRAME_END + RET; +SYM_FUNC_END(cast5_ecb_dec_16way) + +SYM_FUNC_START(cast5_cbc_dec_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + pushq %r12; + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + movq %rdx, %r12; + + vmovdqu (0*16)(%rdx), RL1; + vmovdqu (1*16)(%rdx), RR1; + vmovdqu (2*16)(%rdx), RL2; + vmovdqu (3*16)(%rdx), RR2; + vmovdqu (4*16)(%rdx), RL3; + vmovdqu (5*16)(%rdx), RR3; + vmovdqu (6*16)(%rdx), RL4; + vmovdqu (7*16)(%rdx), RR4; + + call __cast5_dec_blk16; + + /* xor with src */ + vmovq (%r12), RX; + vpshufd $0x4f, RX, RX; + vpxor RX, RR1, RR1; + vpxor 0*16+8(%r12), RL1, RL1; + vpxor 1*16+8(%r12), RR2, RR2; + vpxor 2*16+8(%r12), RL2, RL2; + vpxor 3*16+8(%r12), RR3, RR3; + vpxor 4*16+8(%r12), RL3, RL3; + vpxor 5*16+8(%r12), RR4, RR4; + vpxor 6*16+8(%r12), RL4, RL4; + + vmovdqu RR1, (0*16)(%r11); + vmovdqu RL1, (1*16)(%r11); + vmovdqu RR2, (2*16)(%r11); + vmovdqu RL2, (3*16)(%r11); + vmovdqu RR3, (4*16)(%r11); + vmovdqu RL3, (5*16)(%r11); + vmovdqu RR4, (6*16)(%r11); + vmovdqu RL4, (7*16)(%r11); + + popq %r15; + popq %r12; + FRAME_END + RET; +SYM_FUNC_END(cast5_cbc_dec_16way) + +SYM_FUNC_START(cast5_ctr_16way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: iv (big endian, 64bit) + */ + FRAME_BEGIN + pushq %r12; + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + movq %rdx, %r12; + + vpcmpeqd RTMP, RTMP, RTMP; + vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */ + + vpcmpeqd RKR, RKR, RKR; + vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */ + vmovdqa .Lbswap_iv_mask, R1ST; + vmovdqa .Lbswap128_mask, RKM; + + /* load IV and byteswap */ + vmovq (%rcx), RX; + vpshufb R1ST, RX, RX; + + /* construct IVs */ + vpsubq RTMP, RX, RX; /* le: IV1, IV0 */ + vpshufb RKM, RX, RL1; /* be: IV0, IV1 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RR1; /* be: IV2, IV3 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RL2; /* be: IV4, IV5 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RR2; /* be: IV6, IV7 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RL3; /* be: IV8, IV9 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RR3; /* be: IV10, IV11 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RL4; /* be: IV12, IV13 */ + vpsubq RKR, RX, RX; + vpshufb RKM, RX, RR4; /* be: IV14, IV15 */ + + /* store last IV */ + vpsubq RTMP, RX, RX; /* le: IV16, IV14 */ + vpshufb R1ST, RX, RX; /* be: IV16, IV16 */ + vmovq RX, (%rcx); + + call __cast5_enc_blk16; + + /* dst = src ^ iv */ + vpxor (0*16)(%r12), RR1, RR1; + vpxor (1*16)(%r12), RL1, RL1; + vpxor (2*16)(%r12), RR2, RR2; + vpxor (3*16)(%r12), RL2, RL2; + vpxor (4*16)(%r12), RR3, RR3; + vpxor (5*16)(%r12), RL3, RL3; + vpxor (6*16)(%r12), RR4, RR4; + vpxor (7*16)(%r12), RL4, RL4; + vmovdqu RR1, (0*16)(%r11); + vmovdqu RL1, (1*16)(%r11); + vmovdqu RR2, (2*16)(%r11); + vmovdqu RL2, (3*16)(%r11); + vmovdqu RR3, (4*16)(%r11); + vmovdqu RL3, (5*16)(%r11); + vmovdqu RR4, (6*16)(%r11); + vmovdqu RL4, (7*16)(%r11); + + popq %r15; + popq %r12; + FRAME_END + RET; +SYM_FUNC_END(cast5_ctr_16way) diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c new file mode 100644 index 000000000..3976a87f9 --- /dev/null +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for the AVX assembler implementation of the Cast5 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ecb_cbc_helpers.h" + +#define CAST5_PARALLEL_BLOCKS 16 + +asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src); + +static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return cast5_setkey(&tfm->base, key, keylen); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_enc_16way); + ECB_BLOCK(1, __cast5_encrypt); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_dec_16way); + ECB_BLOCK(1, __cast5_decrypt); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAST5_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__cast5_encrypt); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_cbc_dec_16way); + CBC_DEC_BLOCK(1, __cast5_decrypt); + CBC_WALK_END(); +} + +static struct skcipher_alg cast5_algs[] = { + { + .base.cra_name = "__ecb(cast5)", + .base.cra_driver_name = "__ecb-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast5)", + .base.cra_driver_name = "__cbc-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + } +}; + +static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; + +static int __init cast5_init(void) +{ + const char *feature_name; + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(cast5_algs, + ARRAY_SIZE(cast5_algs), + cast5_simd_algs); +} + +static void __exit cast5_exit(void) +{ + simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), + cast5_simd_algs); +} + +module_init(cast5_init); +module_exit(cast5_exit); + +MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("cast5"); diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S new file mode 100644 index 000000000..82b716fd5 --- /dev/null +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2012-2013 Jussi Kivilinna + */ + +#include +#include +#include "glue_helper-asm-avx.S" + +.file "cast6-avx-x86_64-asm_64.S" + +.extern cast_s1 +.extern cast_s2 +.extern cast_s3 +.extern cast_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (12*4*4) + +/* s-boxes */ +#define s1 cast_s1 +#define s2 cast_s2 +#define s3 cast_s3 +#define s4 cast_s4 + +/********************************************************************** + 8-way AVX cast6 + **********************************************************************/ +#define CTX %r15 + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 +#define R32 %xmm13 +#define R1ST %xmm14 + +#define RTMP %xmm15 + +#define RID1 %rdi +#define RID1d %edi +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ + \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) + +#define qop(in, out, f) \ + F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2); + +#define get_round_keys(nn) \ + vbroadcastss (km+(4*(nn)))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; + +#define Q(n) \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); \ + \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ + \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ + \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); + +#define QBAR(n) \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); \ + \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ + \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ + \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); + +#define shuffle(mask) \ + vpshufb mask, RKR, RKR; + +#define preload_rkr(n, do_mask, mask) \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor (kr+n*16)(CTX), RKR, RKR; \ + do_mask(mask); + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.Lrkr_enc_Q_Q_QBAR_QBAR: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_QBAR_QBAR_QBAR_QBAR: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_dec_Q_Q_Q_Q: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 +.Lrkr_dec_Q_Q_QBAR_QBAR: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 +.Lrkr_dec_QBAR_QBAR_QBAR_QBAR: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.L16_mask, "aM", @progbits, 4 +.align 4 +.L16_mask: + .byte 16, 16, 16, 16 + +.section .rodata.cst4.L32_mask, "aM", @progbits, 4 +.align 4 +.L32_mask: + .byte 32, 0, 0, 0 + +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text + +.align 8 +SYM_FUNC_START_LOCAL(__cast6_enc_blk8) + /* input: + * %rdi: ctx + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks + * output: + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks + */ + + pushq %r15; + pushq %rbx; + + movq %rdi, CTX; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + + inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + preload_rkr(0, dummy, none); + Q(0); + Q(1); + Q(2); + Q(3); + preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR); + Q(4); + Q(5); + QBAR(6); + QBAR(7); + preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR); + QBAR(8); + QBAR(9); + QBAR(10); + QBAR(11); + + popq %rbx; + popq %r15; + + vmovdqa .Lbswap_mask, RKM; + + outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + RET; +SYM_FUNC_END(__cast6_enc_blk8) + +.align 8 +SYM_FUNC_START_LOCAL(__cast6_dec_blk8) + /* input: + * %rdi: ctx + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks + * output: + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks + */ + + pushq %r15; + pushq %rbx; + + movq %rdi, CTX; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + + inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); + Q(11); + Q(10); + Q(9); + Q(8); + preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR); + Q(7); + Q(6); + QBAR(5); + QBAR(4); + preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR); + QBAR(3); + QBAR(2); + QBAR(1); + QBAR(0); + + popq %rbx; + popq %r15; + + vmovdqa .Lbswap_mask, RKM; + outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + RET; +SYM_FUNC_END(__cast6_dec_blk8) + +SYM_FUNC_START(cast6_ecb_enc_8way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __cast6_enc_blk8; + + store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + popq %r15; + FRAME_END + RET; +SYM_FUNC_END(cast6_ecb_enc_8way) + +SYM_FUNC_START(cast6_ecb_dec_8way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __cast6_dec_blk8; + + store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + popq %r15; + FRAME_END + RET; +SYM_FUNC_END(cast6_ecb_dec_8way) + +SYM_FUNC_START(cast6_cbc_dec_8way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + pushq %r12; + pushq %r15; + + movq %rdi, CTX; + movq %rsi, %r11; + movq %rdx, %r12; + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __cast6_dec_blk8; + + store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + popq %r15; + popq %r12; + FRAME_END + RET; +SYM_FUNC_END(cast6_cbc_dec_8way) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c new file mode 100644 index 000000000..7e2aea372 --- /dev/null +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for the AVX assembler implementation of the Cast6 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2013 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ecb_cbc_helpers.h" + +#define CAST6_PARALLEL_BLOCKS 8 + +asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); + +static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return cast6_setkey(&tfm->base, key, keylen); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_enc_8way); + ECB_BLOCK(1, __cast6_encrypt); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_dec_8way); + ECB_BLOCK(1, __cast6_decrypt); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAST6_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__cast6_encrypt); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_cbc_dec_8way); + CBC_DEC_BLOCK(1, __cast6_decrypt); + CBC_WALK_END(); +} + +static struct skcipher_alg cast6_algs[] = { + { + .base.cra_name = "__ecb(cast6)", + .base.cra_driver_name = "__ecb-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast6)", + .base.cra_driver_name = "__cbc-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)]; + +static int __init cast6_init(void) +{ + const char *feature_name; + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(cast6_algs, + ARRAY_SIZE(cast6_algs), + cast6_simd_algs); +} + +static void __exit cast6_exit(void) +{ + simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs), + cast6_simd_algs); +} + +module_init(cast6_init); +module_exit(cast6_exit); + +MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("cast6"); diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S new file mode 100644 index 000000000..f3d8fc018 --- /dev/null +++ b/arch/x86/crypto/chacha-avx2-x86_64.S @@ -0,0 +1,1021 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ChaCha 256-bit cipher algorithm, x64 AVX2 functions + * + * Copyright (C) 2015 Martin Willi + */ + +#include + +.section .rodata.cst32.ROT8, "aM", @progbits, 32 +.align 32 +ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 + .octa 0x0e0d0c0f0a09080b0605040702010003 + +.section .rodata.cst32.ROT16, "aM", @progbits, 32 +.align 32 +ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 + .octa 0x0d0c0f0e09080b0a0504070601000302 + +.section .rodata.cst32.CTRINC, "aM", @progbits, 32 +.align 32 +CTRINC: .octa 0x00000003000000020000000100000000 + .octa 0x00000007000000060000000500000004 + +.section .rodata.cst32.CTR2BL, "aM", @progbits, 32 +.align 32 +CTR2BL: .octa 0x00000000000000000000000000000000 + .octa 0x00000000000000000000000000000001 + +.section .rodata.cst32.CTR4BL, "aM", @progbits, 32 +.align 32 +CTR4BL: .octa 0x00000000000000000000000000000002 + .octa 0x00000000000000000000000000000003 + +.text + +SYM_FUNC_START(chacha_2block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: up to 2 data blocks output, o + # %rdx: up to 2 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts two ChaCha blocks by loading the state + # matrix twice across four AVX registers. It performs matrix operations + # on four words in each matrix in parallel, but requires shuffling to + # rearrange the words after each round. + + vzeroupper + + # x0..3[0-2] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + + vmovdqa %ymm0,%ymm8 + vmovdqa %ymm1,%ymm9 + vmovdqa %ymm2,%ymm10 + vmovdqa %ymm3,%ymm11 + + vmovdqa ROT8(%rip),%ymm4 + vmovdqa ROT16(%rip),%ymm5 + + mov %rcx,%rax + +.Ldoubleround: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm5,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm6 + vpslld $12,%ymm6,%ymm6 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm6,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm4,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm7 + vpslld $7,%ymm7,%ymm7 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm5,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm6 + vpslld $12,%ymm6,%ymm6 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm6,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm4,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm7 + vpslld $7,%ymm7,%ymm7 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + + sub $2,%r8d + jnz .Ldoubleround + + # o0 = i0 ^ (x0 + s0) + vpaddd %ymm8,%ymm0,%ymm7 + cmp $0x10,%rax + jl .Lxorpart2 + vpxor 0x00(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x00(%rsi) + vextracti128 $1,%ymm7,%xmm0 + # o1 = i1 ^ (x1 + s1) + vpaddd %ymm9,%ymm1,%ymm7 + cmp $0x20,%rax + jl .Lxorpart2 + vpxor 0x10(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x10(%rsi) + vextracti128 $1,%ymm7,%xmm1 + # o2 = i2 ^ (x2 + s2) + vpaddd %ymm10,%ymm2,%ymm7 + cmp $0x30,%rax + jl .Lxorpart2 + vpxor 0x20(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x20(%rsi) + vextracti128 $1,%ymm7,%xmm2 + # o3 = i3 ^ (x3 + s3) + vpaddd %ymm11,%ymm3,%ymm7 + cmp $0x40,%rax + jl .Lxorpart2 + vpxor 0x30(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x30(%rsi) + vextracti128 $1,%ymm7,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm7 + cmp $0x50,%rax + jl .Lxorpart2 + vpxor 0x40(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x40(%rsi) + + vmovdqa %xmm1,%xmm7 + cmp $0x60,%rax + jl .Lxorpart2 + vpxor 0x50(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x50(%rsi) + + vmovdqa %xmm2,%xmm7 + cmp $0x70,%rax + jl .Lxorpart2 + vpxor 0x60(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x60(%rsi) + + vmovdqa %xmm3,%xmm7 + cmp $0x80,%rax + jl .Lxorpart2 + vpxor 0x70(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x70(%rsi) + +.Ldone2: + vzeroupper + RET + +.Lxorpart2: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone2 + and $~0x0f,%rax + + mov %rsi,%r11 + + lea 8(%rsp),%r10 + sub $0x10,%rsp + and $~31,%rsp + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + vpxor 0x00(%rsp),%xmm7,%xmm7 + vmovdqa %xmm7,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + lea -8(%r10),%rsp + jmp .Ldone2 + +SYM_FUNC_END(chacha_2block_xor_avx2) + +SYM_FUNC_START(chacha_4block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: up to 4 data blocks output, o + # %rdx: up to 4 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts four ChaCha blocks by loading the state + # matrix four times across eight AVX registers. It performs matrix + # operations on four words in two matrices in parallel, sequentially + # to the operations on the four words of the other two matrices. The + # required word shuffling has a rather high latency, we can do the + # arithmetic on two matrix-pairs without much slowdown. + + vzeroupper + + # x0..3[0-4] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vmovdqa %ymm0,%ymm4 + vmovdqa %ymm1,%ymm5 + vmovdqa %ymm2,%ymm6 + vmovdqa %ymm3,%ymm7 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + vpaddd CTR4BL(%rip),%ymm7,%ymm7 + + vmovdqa %ymm0,%ymm11 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm3,%ymm14 + vmovdqa %ymm7,%ymm15 + + vmovdqa ROT8(%rip),%ymm8 + vmovdqa ROT16(%rip),%ymm9 + + mov %rcx,%rax + +.Ldoubleround4: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm9,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm9,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + vpshufd $0x39,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + vpshufd $0x93,%ymm7,%ymm7 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm9,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm9,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + vpshufd $0x93,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + vpshufd $0x39,%ymm7,%ymm7 + + sub $2,%r8d + jnz .Ldoubleround4 + + # o0 = i0 ^ (x0 + s0), first block + vpaddd %ymm11,%ymm0,%ymm10 + cmp $0x10,%rax + jl .Lxorpart4 + vpxor 0x00(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x00(%rsi) + vextracti128 $1,%ymm10,%xmm0 + # o1 = i1 ^ (x1 + s1), first block + vpaddd %ymm12,%ymm1,%ymm10 + cmp $0x20,%rax + jl .Lxorpart4 + vpxor 0x10(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x10(%rsi) + vextracti128 $1,%ymm10,%xmm1 + # o2 = i2 ^ (x2 + s2), first block + vpaddd %ymm13,%ymm2,%ymm10 + cmp $0x30,%rax + jl .Lxorpart4 + vpxor 0x20(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x20(%rsi) + vextracti128 $1,%ymm10,%xmm2 + # o3 = i3 ^ (x3 + s3), first block + vpaddd %ymm14,%ymm3,%ymm10 + cmp $0x40,%rax + jl .Lxorpart4 + vpxor 0x30(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x30(%rsi) + vextracti128 $1,%ymm10,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm10 + cmp $0x50,%rax + jl .Lxorpart4 + vpxor 0x40(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x40(%rsi) + + vmovdqa %xmm1,%xmm10 + cmp $0x60,%rax + jl .Lxorpart4 + vpxor 0x50(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x50(%rsi) + + vmovdqa %xmm2,%xmm10 + cmp $0x70,%rax + jl .Lxorpart4 + vpxor 0x60(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x60(%rsi) + + vmovdqa %xmm3,%xmm10 + cmp $0x80,%rax + jl .Lxorpart4 + vpxor 0x70(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x70(%rsi) + + # o0 = i0 ^ (x0 + s0), third block + vpaddd %ymm11,%ymm4,%ymm10 + cmp $0x90,%rax + jl .Lxorpart4 + vpxor 0x80(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x80(%rsi) + vextracti128 $1,%ymm10,%xmm4 + # o1 = i1 ^ (x1 + s1), third block + vpaddd %ymm12,%ymm5,%ymm10 + cmp $0xa0,%rax + jl .Lxorpart4 + vpxor 0x90(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x90(%rsi) + vextracti128 $1,%ymm10,%xmm5 + # o2 = i2 ^ (x2 + s2), third block + vpaddd %ymm13,%ymm6,%ymm10 + cmp $0xb0,%rax + jl .Lxorpart4 + vpxor 0xa0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xa0(%rsi) + vextracti128 $1,%ymm10,%xmm6 + # o3 = i3 ^ (x3 + s3), third block + vpaddd %ymm15,%ymm7,%ymm10 + cmp $0xc0,%rax + jl .Lxorpart4 + vpxor 0xb0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xb0(%rsi) + vextracti128 $1,%ymm10,%xmm7 + + # xor and write fourth block + vmovdqa %xmm4,%xmm10 + cmp $0xd0,%rax + jl .Lxorpart4 + vpxor 0xc0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xc0(%rsi) + + vmovdqa %xmm5,%xmm10 + cmp $0xe0,%rax + jl .Lxorpart4 + vpxor 0xd0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xd0(%rsi) + + vmovdqa %xmm6,%xmm10 + cmp $0xf0,%rax + jl .Lxorpart4 + vpxor 0xe0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xe0(%rsi) + + vmovdqa %xmm7,%xmm10 + cmp $0x100,%rax + jl .Lxorpart4 + vpxor 0xf0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xf0(%rsi) + +.Ldone4: + vzeroupper + RET + +.Lxorpart4: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone4 + and $~0x0f,%rax + + mov %rsi,%r11 + + lea 8(%rsp),%r10 + sub $0x10,%rsp + and $~31,%rsp + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + vpxor 0x00(%rsp),%xmm10,%xmm10 + vmovdqa %xmm10,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + lea -8(%r10),%rsp + jmp .Ldone4 + +SYM_FUNC_END(chacha_4block_xor_avx2) + +SYM_FUNC_START(chacha_8block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: up to 8 data blocks output, o + # %rdx: up to 8 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts eight consecutive ChaCha blocks by loading + # the state matrix in AVX registers eight times. As we need some + # scratch registers, we save the first four registers on the stack. The + # algorithm performs each operation on the corresponding word of each + # state matrix, hence requires no word shuffling. For final XORing step + # we transpose the matrix by interleaving 32-, 64- and then 128-bit + # words, which allows us to do XOR in AVX registers. 8/16-bit word + # rotation is done with the slightly better performing byte shuffling, + # 7/12-bit word rotation uses traditional shift+OR. + + vzeroupper + # 4 * 32 byte stack, 32-byte aligned + lea 8(%rsp),%r10 + and $~31, %rsp + sub $0x80, %rsp + mov %rcx,%rax + + # x0..15[0-7] = s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpbroadcastd 0x04(%rdi),%ymm1 + vpbroadcastd 0x08(%rdi),%ymm2 + vpbroadcastd 0x0c(%rdi),%ymm3 + vpbroadcastd 0x10(%rdi),%ymm4 + vpbroadcastd 0x14(%rdi),%ymm5 + vpbroadcastd 0x18(%rdi),%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm7 + vpbroadcastd 0x20(%rdi),%ymm8 + vpbroadcastd 0x24(%rdi),%ymm9 + vpbroadcastd 0x28(%rdi),%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm11 + vpbroadcastd 0x30(%rdi),%ymm12 + vpbroadcastd 0x34(%rdi),%ymm13 + vpbroadcastd 0x38(%rdi),%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm15 + # x0..3 on stack + vmovdqa %ymm0,0x00(%rsp) + vmovdqa %ymm1,0x20(%rsp) + vmovdqa %ymm2,0x40(%rsp) + vmovdqa %ymm3,0x60(%rsp) + + vmovdqa CTRINC(%rip),%ymm1 + vmovdqa ROT8(%rip),%ymm2 + vmovdqa ROT16(%rip),%ymm3 + + # x12 += counter values 0-3 + vpaddd %ymm1,%ymm12,%ymm12 + +.Ldoubleround8: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + vpaddd 0x00(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm3,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + vpaddd 0x20(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm3,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + vpaddd 0x40(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm3,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vpaddd 0x60(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm3,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $12,%ymm4,%ymm0 + vpsrld $20,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $12,%ymm5,%ymm0 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $12,%ymm6,%ymm0 + vpsrld $20,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm11,%ymm7,%ymm7 + vpslld $12,%ymm7,%ymm0 + vpsrld $20,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + vpaddd 0x00(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm2,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + vpaddd 0x20(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm2,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + vpaddd 0x40(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm2,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vpaddd 0x60(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm2,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm0 + vpsrld $25,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm0 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm0 + vpsrld $25,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm11,%ymm7,%ymm7 + vpslld $7,%ymm7,%ymm0 + vpsrld $25,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + vpaddd 0x00(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm3,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0 + vpaddd 0x20(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm3,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + vpaddd 0x40(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm3,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vpaddd 0x60(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm3,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + vpaddd %ymm15,%ymm10,%ymm10 + vpxor %ymm10,%ymm5,%ymm5 + vpslld $12,%ymm5,%ymm0 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + vpaddd %ymm12,%ymm11,%ymm11 + vpxor %ymm11,%ymm6,%ymm6 + vpslld $12,%ymm6,%ymm0 + vpsrld $20,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + vpaddd %ymm13,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpslld $12,%ymm7,%ymm0 + vpsrld $20,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm9,%ymm4,%ymm4 + vpslld $12,%ymm4,%ymm0 + vpsrld $20,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + vpaddd 0x00(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm2,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + vpaddd 0x20(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm2,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + vpaddd 0x40(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm2,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vpaddd 0x60(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm2,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + vpaddd %ymm15,%ymm10,%ymm10 + vpxor %ymm10,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm0 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + vpaddd %ymm12,%ymm11,%ymm11 + vpxor %ymm11,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm0 + vpsrld $25,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + vpaddd %ymm13,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpslld $7,%ymm7,%ymm0 + vpsrld $25,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm9,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm0 + vpsrld $25,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + + sub $2,%r8d + jnz .Ldoubleround8 + + # x0..15[0-3] += s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpaddd 0x00(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpbroadcastd 0x04(%rdi),%ymm0 + vpaddd 0x20(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpbroadcastd 0x08(%rdi),%ymm0 + vpaddd 0x40(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpbroadcastd 0x0c(%rdi),%ymm0 + vpaddd 0x60(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpbroadcastd 0x10(%rdi),%ymm0 + vpaddd %ymm0,%ymm4,%ymm4 + vpbroadcastd 0x14(%rdi),%ymm0 + vpaddd %ymm0,%ymm5,%ymm5 + vpbroadcastd 0x18(%rdi),%ymm0 + vpaddd %ymm0,%ymm6,%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm0 + vpaddd %ymm0,%ymm7,%ymm7 + vpbroadcastd 0x20(%rdi),%ymm0 + vpaddd %ymm0,%ymm8,%ymm8 + vpbroadcastd 0x24(%rdi),%ymm0 + vpaddd %ymm0,%ymm9,%ymm9 + vpbroadcastd 0x28(%rdi),%ymm0 + vpaddd %ymm0,%ymm10,%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm0 + vpaddd %ymm0,%ymm11,%ymm11 + vpbroadcastd 0x30(%rdi),%ymm0 + vpaddd %ymm0,%ymm12,%ymm12 + vpbroadcastd 0x34(%rdi),%ymm0 + vpaddd %ymm0,%ymm13,%ymm13 + vpbroadcastd 0x38(%rdi),%ymm0 + vpaddd %ymm0,%ymm14,%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm0 + vpaddd %ymm0,%ymm15,%ymm15 + + # x12 += counter values 0-3 + vpaddd %ymm1,%ymm12,%ymm12 + + # interleave 32-bit words in state n, n+1 + vmovdqa 0x00(%rsp),%ymm0 + vmovdqa 0x20(%rsp),%ymm1 + vpunpckldq %ymm1,%ymm0,%ymm2 + vpunpckhdq %ymm1,%ymm0,%ymm1 + vmovdqa %ymm2,0x00(%rsp) + vmovdqa %ymm1,0x20(%rsp) + vmovdqa 0x40(%rsp),%ymm0 + vmovdqa 0x60(%rsp),%ymm1 + vpunpckldq %ymm1,%ymm0,%ymm2 + vpunpckhdq %ymm1,%ymm0,%ymm1 + vmovdqa %ymm2,0x40(%rsp) + vmovdqa %ymm1,0x60(%rsp) + vmovdqa %ymm4,%ymm0 + vpunpckldq %ymm5,%ymm0,%ymm4 + vpunpckhdq %ymm5,%ymm0,%ymm5 + vmovdqa %ymm6,%ymm0 + vpunpckldq %ymm7,%ymm0,%ymm6 + vpunpckhdq %ymm7,%ymm0,%ymm7 + vmovdqa %ymm8,%ymm0 + vpunpckldq %ymm9,%ymm0,%ymm8 + vpunpckhdq %ymm9,%ymm0,%ymm9 + vmovdqa %ymm10,%ymm0 + vpunpckldq %ymm11,%ymm0,%ymm10 + vpunpckhdq %ymm11,%ymm0,%ymm11 + vmovdqa %ymm12,%ymm0 + vpunpckldq %ymm13,%ymm0,%ymm12 + vpunpckhdq %ymm13,%ymm0,%ymm13 + vmovdqa %ymm14,%ymm0 + vpunpckldq %ymm15,%ymm0,%ymm14 + vpunpckhdq %ymm15,%ymm0,%ymm15 + + # interleave 64-bit words in state n, n+2 + vmovdqa 0x00(%rsp),%ymm0 + vmovdqa 0x40(%rsp),%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm2 + vmovdqa %ymm1,0x00(%rsp) + vmovdqa %ymm2,0x40(%rsp) + vmovdqa 0x20(%rsp),%ymm0 + vmovdqa 0x60(%rsp),%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm2 + vmovdqa %ymm1,0x20(%rsp) + vmovdqa %ymm2,0x60(%rsp) + vmovdqa %ymm4,%ymm0 + vpunpcklqdq %ymm6,%ymm0,%ymm4 + vpunpckhqdq %ymm6,%ymm0,%ymm6 + vmovdqa %ymm5,%ymm0 + vpunpcklqdq %ymm7,%ymm0,%ymm5 + vpunpckhqdq %ymm7,%ymm0,%ymm7 + vmovdqa %ymm8,%ymm0 + vpunpcklqdq %ymm10,%ymm0,%ymm8 + vpunpckhqdq %ymm10,%ymm0,%ymm10 + vmovdqa %ymm9,%ymm0 + vpunpcklqdq %ymm11,%ymm0,%ymm9 + vpunpckhqdq %ymm11,%ymm0,%ymm11 + vmovdqa %ymm12,%ymm0 + vpunpcklqdq %ymm14,%ymm0,%ymm12 + vpunpckhqdq %ymm14,%ymm0,%ymm14 + vmovdqa %ymm13,%ymm0 + vpunpcklqdq %ymm15,%ymm0,%ymm13 + vpunpckhqdq %ymm15,%ymm0,%ymm15 + + # interleave 128-bit words in state n, n+4 + # xor/write first four blocks + vmovdqa 0x00(%rsp),%ymm1 + vperm2i128 $0x20,%ymm4,%ymm1,%ymm0 + cmp $0x0020,%rax + jl .Lxorpart8 + vpxor 0x0000(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0000(%rsi) + vperm2i128 $0x31,%ymm4,%ymm1,%ymm4 + + vperm2i128 $0x20,%ymm12,%ymm8,%ymm0 + cmp $0x0040,%rax + jl .Lxorpart8 + vpxor 0x0020(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0020(%rsi) + vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 + + vmovdqa 0x40(%rsp),%ymm1 + vperm2i128 $0x20,%ymm6,%ymm1,%ymm0 + cmp $0x0060,%rax + jl .Lxorpart8 + vpxor 0x0040(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0040(%rsi) + vperm2i128 $0x31,%ymm6,%ymm1,%ymm6 + + vperm2i128 $0x20,%ymm14,%ymm10,%ymm0 + cmp $0x0080,%rax + jl .Lxorpart8 + vpxor 0x0060(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0060(%rsi) + vperm2i128 $0x31,%ymm14,%ymm10,%ymm14 + + vmovdqa 0x20(%rsp),%ymm1 + vperm2i128 $0x20,%ymm5,%ymm1,%ymm0 + cmp $0x00a0,%rax + jl .Lxorpart8 + vpxor 0x0080(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0080(%rsi) + vperm2i128 $0x31,%ymm5,%ymm1,%ymm5 + + vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 + cmp $0x00c0,%rax + jl .Lxorpart8 + vpxor 0x00a0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00a0(%rsi) + vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 + + vmovdqa 0x60(%rsp),%ymm1 + vperm2i128 $0x20,%ymm7,%ymm1,%ymm0 + cmp $0x00e0,%rax + jl .Lxorpart8 + vpxor 0x00c0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00c0(%rsi) + vperm2i128 $0x31,%ymm7,%ymm1,%ymm7 + + vperm2i128 $0x20,%ymm15,%ymm11,%ymm0 + cmp $0x0100,%rax + jl .Lxorpart8 + vpxor 0x00e0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00e0(%rsi) + vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 + + # xor remaining blocks, write to output + vmovdqa %ymm4,%ymm0 + cmp $0x0120,%rax + jl .Lxorpart8 + vpxor 0x0100(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0100(%rsi) + + vmovdqa %ymm12,%ymm0 + cmp $0x0140,%rax + jl .Lxorpart8 + vpxor 0x0120(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0120(%rsi) + + vmovdqa %ymm6,%ymm0 + cmp $0x0160,%rax + jl .Lxorpart8 + vpxor 0x0140(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0140(%rsi) + + vmovdqa %ymm14,%ymm0 + cmp $0x0180,%rax + jl .Lxorpart8 + vpxor 0x0160(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0160(%rsi) + + vmovdqa %ymm5,%ymm0 + cmp $0x01a0,%rax + jl .Lxorpart8 + vpxor 0x0180(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0180(%rsi) + + vmovdqa %ymm13,%ymm0 + cmp $0x01c0,%rax + jl .Lxorpart8 + vpxor 0x01a0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x01a0(%rsi) + + vmovdqa %ymm7,%ymm0 + cmp $0x01e0,%rax + jl .Lxorpart8 + vpxor 0x01c0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x01c0(%rsi) + + vmovdqa %ymm15,%ymm0 + cmp $0x0200,%rax + jl .Lxorpart8 + vpxor 0x01e0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x01e0(%rsi) + +.Ldone8: + vzeroupper + lea -8(%r10),%rsp + RET + +.Lxorpart8: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x1f,%r9 + jz .Ldone8 + and $~0x1f,%rax + + mov %rsi,%r11 + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + vpxor 0x00(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + jmp .Ldone8 + +SYM_FUNC_END(chacha_8block_xor_avx2) diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S new file mode 100644 index 000000000..259383e1a --- /dev/null +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -0,0 +1,836 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * ChaCha 256-bit cipher algorithm, x64 AVX-512VL functions + * + * Copyright (C) 2018 Martin Willi + */ + +#include + +.section .rodata.cst32.CTR2BL, "aM", @progbits, 32 +.align 32 +CTR2BL: .octa 0x00000000000000000000000000000000 + .octa 0x00000000000000000000000000000001 + +.section .rodata.cst32.CTR4BL, "aM", @progbits, 32 +.align 32 +CTR4BL: .octa 0x00000000000000000000000000000002 + .octa 0x00000000000000000000000000000003 + +.section .rodata.cst32.CTR8BL, "aM", @progbits, 32 +.align 32 +CTR8BL: .octa 0x00000003000000020000000100000000 + .octa 0x00000007000000060000000500000004 + +.text + +SYM_FUNC_START(chacha_2block_xor_avx512vl) + # %rdi: Input state matrix, s + # %rsi: up to 2 data blocks output, o + # %rdx: up to 2 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts two ChaCha blocks by loading the state + # matrix twice across four AVX registers. It performs matrix operations + # on four words in each matrix in parallel, but requires shuffling to + # rearrange the words after each round. + + vzeroupper + + # x0..3[0-2] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + + vmovdqa %ymm0,%ymm8 + vmovdqa %ymm1,%ymm9 + vmovdqa %ymm2,%ymm10 + vmovdqa %ymm3,%ymm11 + +.Ldoubleround: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + + sub $2,%r8d + jnz .Ldoubleround + + # o0 = i0 ^ (x0 + s0) + vpaddd %ymm8,%ymm0,%ymm7 + cmp $0x10,%rcx + jl .Lxorpart2 + vpxord 0x00(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x00(%rsi) + vextracti128 $1,%ymm7,%xmm0 + # o1 = i1 ^ (x1 + s1) + vpaddd %ymm9,%ymm1,%ymm7 + cmp $0x20,%rcx + jl .Lxorpart2 + vpxord 0x10(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x10(%rsi) + vextracti128 $1,%ymm7,%xmm1 + # o2 = i2 ^ (x2 + s2) + vpaddd %ymm10,%ymm2,%ymm7 + cmp $0x30,%rcx + jl .Lxorpart2 + vpxord 0x20(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x20(%rsi) + vextracti128 $1,%ymm7,%xmm2 + # o3 = i3 ^ (x3 + s3) + vpaddd %ymm11,%ymm3,%ymm7 + cmp $0x40,%rcx + jl .Lxorpart2 + vpxord 0x30(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x30(%rsi) + vextracti128 $1,%ymm7,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm7 + cmp $0x50,%rcx + jl .Lxorpart2 + vpxord 0x40(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x40(%rsi) + + vmovdqa %xmm1,%xmm7 + cmp $0x60,%rcx + jl .Lxorpart2 + vpxord 0x50(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x50(%rsi) + + vmovdqa %xmm2,%xmm7 + cmp $0x70,%rcx + jl .Lxorpart2 + vpxord 0x60(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x60(%rsi) + + vmovdqa %xmm3,%xmm7 + cmp $0x80,%rcx + jl .Lxorpart2 + vpxord 0x70(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x70(%rsi) + +.Ldone2: + vzeroupper + RET + +.Lxorpart2: + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0xf,%rcx + jz .Ldone2 + mov %rax,%r9 + and $~0xf,%r9 + + mov $1,%rax + shld %cl,%rax,%rax + sub $1,%rax + kmovq %rax,%k1 + + vmovdqu8 (%rdx,%r9),%xmm1{%k1}{z} + vpxord %xmm7,%xmm1,%xmm1 + vmovdqu8 %xmm1,(%rsi,%r9){%k1} + + jmp .Ldone2 + +SYM_FUNC_END(chacha_2block_xor_avx512vl) + +SYM_FUNC_START(chacha_4block_xor_avx512vl) + # %rdi: Input state matrix, s + # %rsi: up to 4 data blocks output, o + # %rdx: up to 4 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts four ChaCha blocks by loading the state + # matrix four times across eight AVX registers. It performs matrix + # operations on four words in two matrices in parallel, sequentially + # to the operations on the four words of the other two matrices. The + # required word shuffling has a rather high latency, we can do the + # arithmetic on two matrix-pairs without much slowdown. + + vzeroupper + + # x0..3[0-4] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vmovdqa %ymm0,%ymm4 + vmovdqa %ymm1,%ymm5 + vmovdqa %ymm2,%ymm6 + vmovdqa %ymm3,%ymm7 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + vpaddd CTR4BL(%rip),%ymm7,%ymm7 + + vmovdqa %ymm0,%ymm11 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm3,%ymm14 + vmovdqa %ymm7,%ymm15 + +.Ldoubleround4: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $16,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + vpshufd $0x39,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + vpshufd $0x93,%ymm7,%ymm7 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $16,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + vpshufd $0x93,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + vpshufd $0x39,%ymm7,%ymm7 + + sub $2,%r8d + jnz .Ldoubleround4 + + # o0 = i0 ^ (x0 + s0), first block + vpaddd %ymm11,%ymm0,%ymm10 + cmp $0x10,%rcx + jl .Lxorpart4 + vpxord 0x00(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x00(%rsi) + vextracti128 $1,%ymm10,%xmm0 + # o1 = i1 ^ (x1 + s1), first block + vpaddd %ymm12,%ymm1,%ymm10 + cmp $0x20,%rcx + jl .Lxorpart4 + vpxord 0x10(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x10(%rsi) + vextracti128 $1,%ymm10,%xmm1 + # o2 = i2 ^ (x2 + s2), first block + vpaddd %ymm13,%ymm2,%ymm10 + cmp $0x30,%rcx + jl .Lxorpart4 + vpxord 0x20(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x20(%rsi) + vextracti128 $1,%ymm10,%xmm2 + # o3 = i3 ^ (x3 + s3), first block + vpaddd %ymm14,%ymm3,%ymm10 + cmp $0x40,%rcx + jl .Lxorpart4 + vpxord 0x30(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x30(%rsi) + vextracti128 $1,%ymm10,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm10 + cmp $0x50,%rcx + jl .Lxorpart4 + vpxord 0x40(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x40(%rsi) + + vmovdqa %xmm1,%xmm10 + cmp $0x60,%rcx + jl .Lxorpart4 + vpxord 0x50(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x50(%rsi) + + vmovdqa %xmm2,%xmm10 + cmp $0x70,%rcx + jl .Lxorpart4 + vpxord 0x60(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x60(%rsi) + + vmovdqa %xmm3,%xmm10 + cmp $0x80,%rcx + jl .Lxorpart4 + vpxord 0x70(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x70(%rsi) + + # o0 = i0 ^ (x0 + s0), third block + vpaddd %ymm11,%ymm4,%ymm10 + cmp $0x90,%rcx + jl .Lxorpart4 + vpxord 0x80(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x80(%rsi) + vextracti128 $1,%ymm10,%xmm4 + # o1 = i1 ^ (x1 + s1), third block + vpaddd %ymm12,%ymm5,%ymm10 + cmp $0xa0,%rcx + jl .Lxorpart4 + vpxord 0x90(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x90(%rsi) + vextracti128 $1,%ymm10,%xmm5 + # o2 = i2 ^ (x2 + s2), third block + vpaddd %ymm13,%ymm6,%ymm10 + cmp $0xb0,%rcx + jl .Lxorpart4 + vpxord 0xa0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xa0(%rsi) + vextracti128 $1,%ymm10,%xmm6 + # o3 = i3 ^ (x3 + s3), third block + vpaddd %ymm15,%ymm7,%ymm10 + cmp $0xc0,%rcx + jl .Lxorpart4 + vpxord 0xb0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xb0(%rsi) + vextracti128 $1,%ymm10,%xmm7 + + # xor and write fourth block + vmovdqa %xmm4,%xmm10 + cmp $0xd0,%rcx + jl .Lxorpart4 + vpxord 0xc0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xc0(%rsi) + + vmovdqa %xmm5,%xmm10 + cmp $0xe0,%rcx + jl .Lxorpart4 + vpxord 0xd0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xd0(%rsi) + + vmovdqa %xmm6,%xmm10 + cmp $0xf0,%rcx + jl .Lxorpart4 + vpxord 0xe0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xe0(%rsi) + + vmovdqa %xmm7,%xmm10 + cmp $0x100,%rcx + jl .Lxorpart4 + vpxord 0xf0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xf0(%rsi) + +.Ldone4: + vzeroupper + RET + +.Lxorpart4: + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0xf,%rcx + jz .Ldone4 + mov %rax,%r9 + and $~0xf,%r9 + + mov $1,%rax + shld %cl,%rax,%rax + sub $1,%rax + kmovq %rax,%k1 + + vmovdqu8 (%rdx,%r9),%xmm1{%k1}{z} + vpxord %xmm10,%xmm1,%xmm1 + vmovdqu8 %xmm1,(%rsi,%r9){%k1} + + jmp .Ldone4 + +SYM_FUNC_END(chacha_4block_xor_avx512vl) + +SYM_FUNC_START(chacha_8block_xor_avx512vl) + # %rdi: Input state matrix, s + # %rsi: up to 8 data blocks output, o + # %rdx: up to 8 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts eight consecutive ChaCha blocks by loading + # the state matrix in AVX registers eight times. Compared to AVX2, this + # mostly benefits from the new rotate instructions in VL and the + # additional registers. + + vzeroupper + + # x0..15[0-7] = s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpbroadcastd 0x04(%rdi),%ymm1 + vpbroadcastd 0x08(%rdi),%ymm2 + vpbroadcastd 0x0c(%rdi),%ymm3 + vpbroadcastd 0x10(%rdi),%ymm4 + vpbroadcastd 0x14(%rdi),%ymm5 + vpbroadcastd 0x18(%rdi),%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm7 + vpbroadcastd 0x20(%rdi),%ymm8 + vpbroadcastd 0x24(%rdi),%ymm9 + vpbroadcastd 0x28(%rdi),%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm11 + vpbroadcastd 0x30(%rdi),%ymm12 + vpbroadcastd 0x34(%rdi),%ymm13 + vpbroadcastd 0x38(%rdi),%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm15 + + # x12 += counter values 0-3 + vpaddd CTR8BL(%rip),%ymm12,%ymm12 + + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm1,%ymm17 + vmovdqa64 %ymm2,%ymm18 + vmovdqa64 %ymm3,%ymm19 + vmovdqa64 %ymm4,%ymm20 + vmovdqa64 %ymm5,%ymm21 + vmovdqa64 %ymm6,%ymm22 + vmovdqa64 %ymm7,%ymm23 + vmovdqa64 %ymm8,%ymm24 + vmovdqa64 %ymm9,%ymm25 + vmovdqa64 %ymm10,%ymm26 + vmovdqa64 %ymm11,%ymm27 + vmovdqa64 %ymm12,%ymm28 + vmovdqa64 %ymm13,%ymm29 + vmovdqa64 %ymm14,%ymm30 + vmovdqa64 %ymm15,%ymm31 + +.Ldoubleround8: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + vpaddd %ymm0,%ymm4,%ymm0 + vpxord %ymm0,%ymm12,%ymm12 + vprold $16,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + vpaddd %ymm1,%ymm5,%ymm1 + vpxord %ymm1,%ymm13,%ymm13 + vprold $16,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + vpaddd %ymm2,%ymm6,%ymm2 + vpxord %ymm2,%ymm14,%ymm14 + vprold $16,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vpaddd %ymm3,%ymm7,%ymm3 + vpxord %ymm3,%ymm15,%ymm15 + vprold $16,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + vpaddd %ymm12,%ymm8,%ymm8 + vpxord %ymm8,%ymm4,%ymm4 + vprold $12,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + vpaddd %ymm13,%ymm9,%ymm9 + vpxord %ymm9,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + vpaddd %ymm14,%ymm10,%ymm10 + vpxord %ymm10,%ymm6,%ymm6 + vprold $12,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vpaddd %ymm15,%ymm11,%ymm11 + vpxord %ymm11,%ymm7,%ymm7 + vprold $12,%ymm7,%ymm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + vpaddd %ymm0,%ymm4,%ymm0 + vpxord %ymm0,%ymm12,%ymm12 + vprold $8,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + vpaddd %ymm1,%ymm5,%ymm1 + vpxord %ymm1,%ymm13,%ymm13 + vprold $8,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + vpaddd %ymm2,%ymm6,%ymm2 + vpxord %ymm2,%ymm14,%ymm14 + vprold $8,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vpaddd %ymm3,%ymm7,%ymm3 + vpxord %ymm3,%ymm15,%ymm15 + vprold $8,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + vpaddd %ymm12,%ymm8,%ymm8 + vpxord %ymm8,%ymm4,%ymm4 + vprold $7,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + vpaddd %ymm13,%ymm9,%ymm9 + vpxord %ymm9,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + vpaddd %ymm14,%ymm10,%ymm10 + vpxord %ymm10,%ymm6,%ymm6 + vprold $7,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vpaddd %ymm15,%ymm11,%ymm11 + vpxord %ymm11,%ymm7,%ymm7 + vprold $7,%ymm7,%ymm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + vpaddd %ymm0,%ymm5,%ymm0 + vpxord %ymm0,%ymm15,%ymm15 + vprold $16,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16) + vpaddd %ymm1,%ymm6,%ymm1 + vpxord %ymm1,%ymm12,%ymm12 + vprold $16,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + vpaddd %ymm2,%ymm7,%ymm2 + vpxord %ymm2,%ymm13,%ymm13 + vprold $16,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vpaddd %ymm3,%ymm4,%ymm3 + vpxord %ymm3,%ymm14,%ymm14 + vprold $16,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + vpaddd %ymm15,%ymm10,%ymm10 + vpxord %ymm10,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + vpaddd %ymm12,%ymm11,%ymm11 + vpxord %ymm11,%ymm6,%ymm6 + vprold $12,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + vpaddd %ymm13,%ymm8,%ymm8 + vpxord %ymm8,%ymm7,%ymm7 + vprold $12,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vpaddd %ymm14,%ymm9,%ymm9 + vpxord %ymm9,%ymm4,%ymm4 + vprold $12,%ymm4,%ymm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + vpaddd %ymm0,%ymm5,%ymm0 + vpxord %ymm0,%ymm15,%ymm15 + vprold $8,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + vpaddd %ymm1,%ymm6,%ymm1 + vpxord %ymm1,%ymm12,%ymm12 + vprold $8,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + vpaddd %ymm2,%ymm7,%ymm2 + vpxord %ymm2,%ymm13,%ymm13 + vprold $8,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vpaddd %ymm3,%ymm4,%ymm3 + vpxord %ymm3,%ymm14,%ymm14 + vprold $8,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + vpaddd %ymm15,%ymm10,%ymm10 + vpxord %ymm10,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + vpaddd %ymm12,%ymm11,%ymm11 + vpxord %ymm11,%ymm6,%ymm6 + vprold $7,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + vpaddd %ymm13,%ymm8,%ymm8 + vpxord %ymm8,%ymm7,%ymm7 + vprold $7,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vpaddd %ymm14,%ymm9,%ymm9 + vpxord %ymm9,%ymm4,%ymm4 + vprold $7,%ymm4,%ymm4 + + sub $2,%r8d + jnz .Ldoubleround8 + + # x0..15[0-3] += s[0..15] + vpaddd %ymm16,%ymm0,%ymm0 + vpaddd %ymm17,%ymm1,%ymm1 + vpaddd %ymm18,%ymm2,%ymm2 + vpaddd %ymm19,%ymm3,%ymm3 + vpaddd %ymm20,%ymm4,%ymm4 + vpaddd %ymm21,%ymm5,%ymm5 + vpaddd %ymm22,%ymm6,%ymm6 + vpaddd %ymm23,%ymm7,%ymm7 + vpaddd %ymm24,%ymm8,%ymm8 + vpaddd %ymm25,%ymm9,%ymm9 + vpaddd %ymm26,%ymm10,%ymm10 + vpaddd %ymm27,%ymm11,%ymm11 + vpaddd %ymm28,%ymm12,%ymm12 + vpaddd %ymm29,%ymm13,%ymm13 + vpaddd %ymm30,%ymm14,%ymm14 + vpaddd %ymm31,%ymm15,%ymm15 + + # interleave 32-bit words in state n, n+1 + vpunpckldq %ymm1,%ymm0,%ymm16 + vpunpckhdq %ymm1,%ymm0,%ymm17 + vpunpckldq %ymm3,%ymm2,%ymm18 + vpunpckhdq %ymm3,%ymm2,%ymm19 + vpunpckldq %ymm5,%ymm4,%ymm20 + vpunpckhdq %ymm5,%ymm4,%ymm21 + vpunpckldq %ymm7,%ymm6,%ymm22 + vpunpckhdq %ymm7,%ymm6,%ymm23 + vpunpckldq %ymm9,%ymm8,%ymm24 + vpunpckhdq %ymm9,%ymm8,%ymm25 + vpunpckldq %ymm11,%ymm10,%ymm26 + vpunpckhdq %ymm11,%ymm10,%ymm27 + vpunpckldq %ymm13,%ymm12,%ymm28 + vpunpckhdq %ymm13,%ymm12,%ymm29 + vpunpckldq %ymm15,%ymm14,%ymm30 + vpunpckhdq %ymm15,%ymm14,%ymm31 + + # interleave 64-bit words in state n, n+2 + vpunpcklqdq %ymm18,%ymm16,%ymm0 + vpunpcklqdq %ymm19,%ymm17,%ymm1 + vpunpckhqdq %ymm18,%ymm16,%ymm2 + vpunpckhqdq %ymm19,%ymm17,%ymm3 + vpunpcklqdq %ymm22,%ymm20,%ymm4 + vpunpcklqdq %ymm23,%ymm21,%ymm5 + vpunpckhqdq %ymm22,%ymm20,%ymm6 + vpunpckhqdq %ymm23,%ymm21,%ymm7 + vpunpcklqdq %ymm26,%ymm24,%ymm8 + vpunpcklqdq %ymm27,%ymm25,%ymm9 + vpunpckhqdq %ymm26,%ymm24,%ymm10 + vpunpckhqdq %ymm27,%ymm25,%ymm11 + vpunpcklqdq %ymm30,%ymm28,%ymm12 + vpunpcklqdq %ymm31,%ymm29,%ymm13 + vpunpckhqdq %ymm30,%ymm28,%ymm14 + vpunpckhqdq %ymm31,%ymm29,%ymm15 + + # interleave 128-bit words in state n, n+4 + # xor/write first four blocks + vmovdqa64 %ymm0,%ymm16 + vperm2i128 $0x20,%ymm4,%ymm0,%ymm0 + cmp $0x0020,%rcx + jl .Lxorpart8 + vpxord 0x0000(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0000(%rsi) + vmovdqa64 %ymm16,%ymm0 + vperm2i128 $0x31,%ymm4,%ymm0,%ymm4 + + vperm2i128 $0x20,%ymm12,%ymm8,%ymm0 + cmp $0x0040,%rcx + jl .Lxorpart8 + vpxord 0x0020(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0020(%rsi) + vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 + + vperm2i128 $0x20,%ymm6,%ymm2,%ymm0 + cmp $0x0060,%rcx + jl .Lxorpart8 + vpxord 0x0040(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0040(%rsi) + vperm2i128 $0x31,%ymm6,%ymm2,%ymm6 + + vperm2i128 $0x20,%ymm14,%ymm10,%ymm0 + cmp $0x0080,%rcx + jl .Lxorpart8 + vpxord 0x0060(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0060(%rsi) + vperm2i128 $0x31,%ymm14,%ymm10,%ymm14 + + vperm2i128 $0x20,%ymm5,%ymm1,%ymm0 + cmp $0x00a0,%rcx + jl .Lxorpart8 + vpxord 0x0080(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0080(%rsi) + vperm2i128 $0x31,%ymm5,%ymm1,%ymm5 + + vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 + cmp $0x00c0,%rcx + jl .Lxorpart8 + vpxord 0x00a0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x00a0(%rsi) + vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 + + vperm2i128 $0x20,%ymm7,%ymm3,%ymm0 + cmp $0x00e0,%rcx + jl .Lxorpart8 + vpxord 0x00c0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x00c0(%rsi) + vperm2i128 $0x31,%ymm7,%ymm3,%ymm7 + + vperm2i128 $0x20,%ymm15,%ymm11,%ymm0 + cmp $0x0100,%rcx + jl .Lxorpart8 + vpxord 0x00e0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x00e0(%rsi) + vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 + + # xor remaining blocks, write to output + vmovdqa64 %ymm4,%ymm0 + cmp $0x0120,%rcx + jl .Lxorpart8 + vpxord 0x0100(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0100(%rsi) + + vmovdqa64 %ymm12,%ymm0 + cmp $0x0140,%rcx + jl .Lxorpart8 + vpxord 0x0120(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0120(%rsi) + + vmovdqa64 %ymm6,%ymm0 + cmp $0x0160,%rcx + jl .Lxorpart8 + vpxord 0x0140(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0140(%rsi) + + vmovdqa64 %ymm14,%ymm0 + cmp $0x0180,%rcx + jl .Lxorpart8 + vpxord 0x0160(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0160(%rsi) + + vmovdqa64 %ymm5,%ymm0 + cmp $0x01a0,%rcx + jl .Lxorpart8 + vpxord 0x0180(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0180(%rsi) + + vmovdqa64 %ymm13,%ymm0 + cmp $0x01c0,%rcx + jl .Lxorpart8 + vpxord 0x01a0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x01a0(%rsi) + + vmovdqa64 %ymm7,%ymm0 + cmp $0x01e0,%rcx + jl .Lxorpart8 + vpxord 0x01c0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x01c0(%rsi) + + vmovdqa64 %ymm15,%ymm0 + cmp $0x0200,%rcx + jl .Lxorpart8 + vpxord 0x01e0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x01e0(%rsi) + +.Ldone8: + vzeroupper + RET + +.Lxorpart8: + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0x1f,%rcx + jz .Ldone8 + mov %rax,%r9 + and $~0x1f,%r9 + + mov $1,%rax + shld %cl,%rax,%rax + sub $1,%rax + kmovq %rax,%k1 + + vmovdqu8 (%rdx,%r9),%ymm1{%k1}{z} + vpxord %ymm0,%ymm1,%ymm1 + vmovdqu8 %ymm1,(%rsi,%r9){%k1} + + jmp .Ldone8 + +SYM_FUNC_END(chacha_8block_xor_avx512vl) diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S new file mode 100644 index 000000000..7111949cd --- /dev/null +++ b/arch/x86/crypto/chacha-ssse3-x86_64.S @@ -0,0 +1,791 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ChaCha 256-bit cipher algorithm, x64 SSSE3 functions + * + * Copyright (C) 2015 Martin Willi + */ + +#include +#include + +.section .rodata.cst16.ROT8, "aM", @progbits, 16 +.align 16 +ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 +ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 +.section .rodata.cst16.CTRINC, "aM", @progbits, 16 +.align 16 +CTRINC: .octa 0x00000003000000020000000100000000 + +.text + +/* + * chacha_permute - permute one block + * + * Permute one 64-byte block where the state matrix is in %xmm0-%xmm3. This + * function performs matrix operations on four words in parallel, but requires + * shuffling to rearrange the words after each round. 8/16-bit word rotation is + * done with the slightly better performing SSSE3 byte shuffling, 7/12-bit word + * rotation uses traditional shift+OR. + * + * The round count is given in %r8d. + * + * Clobbers: %r8d, %xmm4-%xmm7 + */ +SYM_FUNC_START_LOCAL(chacha_permute) + + movdqa ROT8(%rip),%xmm4 + movdqa ROT16(%rip),%xmm5 + +.Ldoubleround: + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm5,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm6 + pslld $12,%xmm6 + psrld $20,%xmm1 + por %xmm6,%xmm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm4,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm7 + pslld $7,%xmm7 + psrld $25,%xmm1 + por %xmm7,%xmm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + pshufd $0x39,%xmm1,%xmm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + pshufd $0x4e,%xmm2,%xmm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + pshufd $0x93,%xmm3,%xmm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm5,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm6 + pslld $12,%xmm6 + psrld $20,%xmm1 + por %xmm6,%xmm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm4,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm7 + pslld $7,%xmm7 + psrld $25,%xmm1 + por %xmm7,%xmm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + pshufd $0x93,%xmm1,%xmm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + pshufd $0x4e,%xmm2,%xmm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + pshufd $0x39,%xmm3,%xmm3 + + sub $2,%r8d + jnz .Ldoubleround + + RET +SYM_FUNC_END(chacha_permute) + +SYM_FUNC_START(chacha_block_xor_ssse3) + # %rdi: Input state matrix, s + # %rsi: up to 1 data block output, o + # %rdx: up to 1 data block input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + FRAME_BEGIN + + # x0..3 = s0..3 + movdqu 0x00(%rdi),%xmm0 + movdqu 0x10(%rdi),%xmm1 + movdqu 0x20(%rdi),%xmm2 + movdqu 0x30(%rdi),%xmm3 + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + movdqa %xmm2,%xmm10 + movdqa %xmm3,%xmm11 + + mov %rcx,%rax + call chacha_permute + + # o0 = i0 ^ (x0 + s0) + paddd %xmm8,%xmm0 + cmp $0x10,%rax + jl .Lxorpart + movdqu 0x00(%rdx),%xmm4 + pxor %xmm4,%xmm0 + movdqu %xmm0,0x00(%rsi) + # o1 = i1 ^ (x1 + s1) + paddd %xmm9,%xmm1 + movdqa %xmm1,%xmm0 + cmp $0x20,%rax + jl .Lxorpart + movdqu 0x10(%rdx),%xmm0 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x10(%rsi) + # o2 = i2 ^ (x2 + s2) + paddd %xmm10,%xmm2 + movdqa %xmm2,%xmm0 + cmp $0x30,%rax + jl .Lxorpart + movdqu 0x20(%rdx),%xmm0 + pxor %xmm2,%xmm0 + movdqu %xmm0,0x20(%rsi) + # o3 = i3 ^ (x3 + s3) + paddd %xmm11,%xmm3 + movdqa %xmm3,%xmm0 + cmp $0x40,%rax + jl .Lxorpart + movdqu 0x30(%rdx),%xmm0 + pxor %xmm3,%xmm0 + movdqu %xmm0,0x30(%rsi) + +.Ldone: + FRAME_END + RET + +.Lxorpart: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone + and $~0x0f,%rax + + mov %rsi,%r11 + + lea 8(%rsp),%r10 + sub $0x10,%rsp + and $~31,%rsp + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + pxor 0x00(%rsp),%xmm0 + movdqa %xmm0,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + lea -8(%r10),%rsp + jmp .Ldone + +SYM_FUNC_END(chacha_block_xor_ssse3) + +SYM_FUNC_START(hchacha_block_ssse3) + # %rdi: Input state matrix, s + # %rsi: output (8 32-bit words) + # %edx: nrounds + FRAME_BEGIN + + movdqu 0x00(%rdi),%xmm0 + movdqu 0x10(%rdi),%xmm1 + movdqu 0x20(%rdi),%xmm2 + movdqu 0x30(%rdi),%xmm3 + + mov %edx,%r8d + call chacha_permute + + movdqu %xmm0,0x00(%rsi) + movdqu %xmm3,0x10(%rsi) + + FRAME_END + RET +SYM_FUNC_END(hchacha_block_ssse3) + +SYM_FUNC_START(chacha_4block_xor_ssse3) + # %rdi: Input state matrix, s + # %rsi: up to 4 data blocks output, o + # %rdx: up to 4 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts four consecutive ChaCha blocks by loading the + # the state matrix in SSE registers four times. As we need some scratch + # registers, we save the first four registers on the stack. The + # algorithm performs each operation on the corresponding word of each + # state matrix, hence requires no word shuffling. For final XORing step + # we transpose the matrix by interleaving 32- and then 64-bit words, + # which allows us to do XOR in SSE registers. 8/16-bit word rotation is + # done with the slightly better performing SSSE3 byte shuffling, + # 7/12-bit word rotation uses traditional shift+OR. + + lea 8(%rsp),%r10 + sub $0x80,%rsp + and $~63,%rsp + mov %rcx,%rax + + # x0..15[0-3] = s0..3[0..3] + movq 0x00(%rdi),%xmm1 + pshufd $0x00,%xmm1,%xmm0 + pshufd $0x55,%xmm1,%xmm1 + movq 0x08(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + movq 0x10(%rdi),%xmm5 + pshufd $0x00,%xmm5,%xmm4 + pshufd $0x55,%xmm5,%xmm5 + movq 0x18(%rdi),%xmm7 + pshufd $0x00,%xmm7,%xmm6 + pshufd $0x55,%xmm7,%xmm7 + movq 0x20(%rdi),%xmm9 + pshufd $0x00,%xmm9,%xmm8 + pshufd $0x55,%xmm9,%xmm9 + movq 0x28(%rdi),%xmm11 + pshufd $0x00,%xmm11,%xmm10 + pshufd $0x55,%xmm11,%xmm11 + movq 0x30(%rdi),%xmm13 + pshufd $0x00,%xmm13,%xmm12 + pshufd $0x55,%xmm13,%xmm13 + movq 0x38(%rdi),%xmm15 + pshufd $0x00,%xmm15,%xmm14 + pshufd $0x55,%xmm15,%xmm15 + # x0..3 on stack + movdqa %xmm0,0x00(%rsp) + movdqa %xmm1,0x10(%rsp) + movdqa %xmm2,0x20(%rsp) + movdqa %xmm3,0x30(%rsp) + + movdqa CTRINC(%rip),%xmm1 + movdqa ROT8(%rip),%xmm2 + movdqa ROT16(%rip),%xmm3 + + # x12 += counter values 0-3 + paddd %xmm1,%xmm12 + +.Ldoubleround4: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm3,%xmm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm3,%xmm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm3,%xmm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm3,%xmm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm4 + por %xmm0,%xmm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm5 + por %xmm0,%xmm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm6 + por %xmm0,%xmm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm7 + por %xmm0,%xmm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm2,%xmm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm2,%xmm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm2,%xmm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm2,%xmm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm4 + por %xmm0,%xmm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm5 + por %xmm0,%xmm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm6 + por %xmm0,%xmm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm7 + por %xmm0,%xmm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm3,%xmm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm3,%xmm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm3,%xmm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm3,%xmm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + paddd %xmm15,%xmm10 + pxor %xmm10,%xmm5 + movdqa %xmm5,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm5 + por %xmm0,%xmm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + paddd %xmm12,%xmm11 + pxor %xmm11,%xmm6 + movdqa %xmm6,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm6 + por %xmm0,%xmm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + paddd %xmm13,%xmm8 + pxor %xmm8,%xmm7 + movdqa %xmm7,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm7 + por %xmm0,%xmm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + paddd %xmm14,%xmm9 + pxor %xmm9,%xmm4 + movdqa %xmm4,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm4 + por %xmm0,%xmm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm2,%xmm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm2,%xmm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm2,%xmm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm2,%xmm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + paddd %xmm15,%xmm10 + pxor %xmm10,%xmm5 + movdqa %xmm5,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm5 + por %xmm0,%xmm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + paddd %xmm12,%xmm11 + pxor %xmm11,%xmm6 + movdqa %xmm6,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm6 + por %xmm0,%xmm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + paddd %xmm13,%xmm8 + pxor %xmm8,%xmm7 + movdqa %xmm7,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm7 + por %xmm0,%xmm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + paddd %xmm14,%xmm9 + pxor %xmm9,%xmm4 + movdqa %xmm4,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm4 + por %xmm0,%xmm4 + + sub $2,%r8d + jnz .Ldoubleround4 + + # x0[0-3] += s0[0] + # x1[0-3] += s0[1] + movq 0x00(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd 0x00(%rsp),%xmm2 + movdqa %xmm2,0x00(%rsp) + paddd 0x10(%rsp),%xmm3 + movdqa %xmm3,0x10(%rsp) + # x2[0-3] += s0[2] + # x3[0-3] += s0[3] + movq 0x08(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd 0x20(%rsp),%xmm2 + movdqa %xmm2,0x20(%rsp) + paddd 0x30(%rsp),%xmm3 + movdqa %xmm3,0x30(%rsp) + + # x4[0-3] += s1[0] + # x5[0-3] += s1[1] + movq 0x10(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + # x6[0-3] += s1[2] + # x7[0-3] += s1[3] + movq 0x18(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + + # x8[0-3] += s2[0] + # x9[0-3] += s2[1] + movq 0x20(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm8 + paddd %xmm3,%xmm9 + # x10[0-3] += s2[2] + # x11[0-3] += s2[3] + movq 0x28(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm10 + paddd %xmm3,%xmm11 + + # x12[0-3] += s3[0] + # x13[0-3] += s3[1] + movq 0x30(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm12 + paddd %xmm3,%xmm13 + # x14[0-3] += s3[2] + # x15[0-3] += s3[3] + movq 0x38(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm14 + paddd %xmm3,%xmm15 + + # x12 += counter values 0-3 + paddd %xmm1,%xmm12 + + # interleave 32-bit words in state n, n+1 + movdqa 0x00(%rsp),%xmm0 + movdqa 0x10(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpckldq %xmm1,%xmm2 + punpckhdq %xmm1,%xmm0 + movdqa %xmm2,0x00(%rsp) + movdqa %xmm0,0x10(%rsp) + movdqa 0x20(%rsp),%xmm0 + movdqa 0x30(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpckldq %xmm1,%xmm2 + punpckhdq %xmm1,%xmm0 + movdqa %xmm2,0x20(%rsp) + movdqa %xmm0,0x30(%rsp) + movdqa %xmm4,%xmm0 + punpckldq %xmm5,%xmm4 + punpckhdq %xmm5,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm6,%xmm0 + punpckldq %xmm7,%xmm6 + punpckhdq %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + movdqa %xmm8,%xmm0 + punpckldq %xmm9,%xmm8 + punpckhdq %xmm9,%xmm0 + movdqa %xmm0,%xmm9 + movdqa %xmm10,%xmm0 + punpckldq %xmm11,%xmm10 + punpckhdq %xmm11,%xmm0 + movdqa %xmm0,%xmm11 + movdqa %xmm12,%xmm0 + punpckldq %xmm13,%xmm12 + punpckhdq %xmm13,%xmm0 + movdqa %xmm0,%xmm13 + movdqa %xmm14,%xmm0 + punpckldq %xmm15,%xmm14 + punpckhdq %xmm15,%xmm0 + movdqa %xmm0,%xmm15 + + # interleave 64-bit words in state n, n+2 + movdqa 0x00(%rsp),%xmm0 + movdqa 0x20(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpcklqdq %xmm1,%xmm2 + punpckhqdq %xmm1,%xmm0 + movdqa %xmm2,0x00(%rsp) + movdqa %xmm0,0x20(%rsp) + movdqa 0x10(%rsp),%xmm0 + movdqa 0x30(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpcklqdq %xmm1,%xmm2 + punpckhqdq %xmm1,%xmm0 + movdqa %xmm2,0x10(%rsp) + movdqa %xmm0,0x30(%rsp) + movdqa %xmm4,%xmm0 + punpcklqdq %xmm6,%xmm4 + punpckhqdq %xmm6,%xmm0 + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + punpcklqdq %xmm7,%xmm5 + punpckhqdq %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + movdqa %xmm8,%xmm0 + punpcklqdq %xmm10,%xmm8 + punpckhqdq %xmm10,%xmm0 + movdqa %xmm0,%xmm10 + movdqa %xmm9,%xmm0 + punpcklqdq %xmm11,%xmm9 + punpckhqdq %xmm11,%xmm0 + movdqa %xmm0,%xmm11 + movdqa %xmm12,%xmm0 + punpcklqdq %xmm14,%xmm12 + punpckhqdq %xmm14,%xmm0 + movdqa %xmm0,%xmm14 + movdqa %xmm13,%xmm0 + punpcklqdq %xmm15,%xmm13 + punpckhqdq %xmm15,%xmm0 + movdqa %xmm0,%xmm15 + + # xor with corresponding input, write to output + movdqa 0x00(%rsp),%xmm0 + cmp $0x10,%rax + jl .Lxorpart4 + movdqu 0x00(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x00(%rsi) + + movdqu %xmm4,%xmm0 + cmp $0x20,%rax + jl .Lxorpart4 + movdqu 0x10(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x10(%rsi) + + movdqu %xmm8,%xmm0 + cmp $0x30,%rax + jl .Lxorpart4 + movdqu 0x20(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x20(%rsi) + + movdqu %xmm12,%xmm0 + cmp $0x40,%rax + jl .Lxorpart4 + movdqu 0x30(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x30(%rsi) + + movdqa 0x20(%rsp),%xmm0 + cmp $0x50,%rax + jl .Lxorpart4 + movdqu 0x40(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x40(%rsi) + + movdqu %xmm6,%xmm0 + cmp $0x60,%rax + jl .Lxorpart4 + movdqu 0x50(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x50(%rsi) + + movdqu %xmm10,%xmm0 + cmp $0x70,%rax + jl .Lxorpart4 + movdqu 0x60(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x60(%rsi) + + movdqu %xmm14,%xmm0 + cmp $0x80,%rax + jl .Lxorpart4 + movdqu 0x70(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x70(%rsi) + + movdqa 0x10(%rsp),%xmm0 + cmp $0x90,%rax + jl .Lxorpart4 + movdqu 0x80(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x80(%rsi) + + movdqu %xmm5,%xmm0 + cmp $0xa0,%rax + jl .Lxorpart4 + movdqu 0x90(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x90(%rsi) + + movdqu %xmm9,%xmm0 + cmp $0xb0,%rax + jl .Lxorpart4 + movdqu 0xa0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xa0(%rsi) + + movdqu %xmm13,%xmm0 + cmp $0xc0,%rax + jl .Lxorpart4 + movdqu 0xb0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xb0(%rsi) + + movdqa 0x30(%rsp),%xmm0 + cmp $0xd0,%rax + jl .Lxorpart4 + movdqu 0xc0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xc0(%rsi) + + movdqu %xmm7,%xmm0 + cmp $0xe0,%rax + jl .Lxorpart4 + movdqu 0xd0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xd0(%rsi) + + movdqu %xmm11,%xmm0 + cmp $0xf0,%rax + jl .Lxorpart4 + movdqu 0xe0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xe0(%rsi) + + movdqu %xmm15,%xmm0 + cmp $0x100,%rax + jl .Lxorpart4 + movdqu 0xf0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xf0(%rsi) + +.Ldone4: + lea -8(%r10),%rsp + RET + +.Lxorpart4: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone4 + and $~0x0f,%rax + + mov %rsi,%r11 + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + pxor 0x00(%rsp),%xmm0 + movdqa %xmm0,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + jmp .Ldone4 + +SYM_FUNC_END(chacha_4block_xor_ssse3) diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c new file mode 100644 index 000000000..7b3a1cf09 --- /dev/null +++ b/arch/x86/crypto/chacha_glue.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * x64 SIMD accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); + +asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); + +static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) +{ + len = min(len, maxblocks * CHACHA_BLOCK_SIZE); + return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; +} + +static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + if (IS_ENABLED(CONFIG_AS_AVX512) && + static_branch_likely(&chacha_use_avx512vl)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_2block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state[12] += chacha_advance(bytes, 2); + return; + } + } + + if (static_branch_likely(&chacha_use_avx2)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 4); + return; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 2); + return; + } + } + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state[12] += 4; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); + state[12]++; + } +} + +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { + hchacha_block_generic(state, stream, nrounds); + } else { + kernel_fpu_begin(); + hchacha_block_ssse3(state, stream, nrounds); + kernel_fpu_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || + bytes <= CHACHA_BLOCK_SIZE) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_fpu_begin(); + chacha_dosimd(state, dst, src, todo, nrounds); + kernel_fpu_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +static int chacha_simd_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv) +{ + u32 state[CHACHA_STATE_WORDS] __aligned(8); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + chacha_init_generic(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + if (!static_branch_likely(&chacha_use_simd) || + !crypto_simd_usable()) { + chacha_crypt_generic(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + } else { + kernel_fpu_begin(); + chacha_dosimd(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + kernel_fpu_end(); + } + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int chacha_simd(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_simd_stream_xor(req, ctx, req->iv); +} + +static int xchacha_simd(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 state[CHACHA_STATE_WORDS] __aligned(8); + struct chacha_ctx subctx; + u8 real_iv[16]; + + chacha_init_generic(state, ctx->key, req->iv); + + if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { + kernel_fpu_begin(); + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + kernel_fpu_end(); + } else { + hchacha_block_generic(state, subctx.key, ctx->nrounds); + } + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_simd_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_simd, + .decrypt = chacha_simd, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_simd, + .decrypt = xchacha_simd, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_simd, + .decrypt = xchacha_simd, + }, +}; + +static int __init chacha_simd_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_SSSE3)) + return 0; + + static_branch_enable(&chacha_use_simd); + + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + static_branch_enable(&chacha_use_avx2); + + if (IS_ENABLED(CONFIG_AS_AVX512) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ + static_branch_enable(&chacha_use_avx512vl); + } + return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? + crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; +} + +static void __exit chacha_simd_mod_fini(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Willi "); +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-simd"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-simd"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-simd"); diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S new file mode 100644 index 000000000..ca53e9699 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas + * Alexander Boyko + */ + +#include + + +.section .rodata +.align 16 +/* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ +.Lconstant_R2R1: + .octa 0x00000001c6e415960000000154442bd4 +/* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ +.Lconstant_R4R3: + .octa 0x00000000ccaa009e00000001751997d0 +/* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ +.Lconstant_R5: + .octa 0x00000000000000000000000163cd6124 +.Lconstant_mask32: + .octa 0x000000000000000000000000FFFFFFFF +/* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ +.Lconstant_RUpoly: + .octa 0x00000001F701164100000001DB710641 + +#define CONSTANT %xmm0 + +#ifdef __x86_64__ +#define BUF %rdi +#define LEN %rsi +#define CRC %edx +#else +#define BUF %eax +#define LEN %edx +#define CRC %ecx +#endif + + + +.text +/** + * Calculate crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pclmul_le_16(unsigned char const *buffer, + * size_t len, uint crc32) + */ + +SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ + movdqa (BUF), %xmm1 + movdqa 0x10(BUF), %xmm2 + movdqa 0x20(BUF), %xmm3 + movdqa 0x30(BUF), %xmm4 + movd CRC, CONSTANT + pxor CONSTANT, %xmm1 + sub $0x40, LEN + add $0x40, BUF + cmp $0x40, LEN + jb less_64 + +#ifdef __x86_64__ + movdqa .Lconstant_R2R1(%rip), CONSTANT +#else + movdqa .Lconstant_R2R1, CONSTANT +#endif + +loop_64:/* 64 bytes Full cache line folding */ + prefetchnta 0x40(BUF) + movdqa %xmm1, %xmm5 + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 +#ifdef __x86_64__ + movdqa %xmm4, %xmm8 +#endif + pclmulqdq $0x00, CONSTANT, %xmm1 + pclmulqdq $0x00, CONSTANT, %xmm2 + pclmulqdq $0x00, CONSTANT, %xmm3 +#ifdef __x86_64__ + pclmulqdq $0x00, CONSTANT, %xmm4 +#endif + pclmulqdq $0x11, CONSTANT, %xmm5 + pclmulqdq $0x11, CONSTANT, %xmm6 + pclmulqdq $0x11, CONSTANT, %xmm7 +#ifdef __x86_64__ + pclmulqdq $0x11, CONSTANT, %xmm8 +#endif + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 +#ifdef __x86_64__ + pxor %xmm8, %xmm4 +#else + /* xmm8 unsupported for x32 */ + movdqa %xmm4, %xmm5 + pclmulqdq $0x00, CONSTANT, %xmm4 + pclmulqdq $0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm4 +#endif + + pxor (BUF), %xmm1 + pxor 0x10(BUF), %xmm2 + pxor 0x20(BUF), %xmm3 + pxor 0x30(BUF), %xmm4 + + sub $0x40, LEN + add $0x40, BUF + cmp $0x40, LEN + jge loop_64 +less_64:/* Folding cache line into 128bit */ +#ifdef __x86_64__ + movdqa .Lconstant_R4R3(%rip), CONSTANT +#else + movdqa .Lconstant_R4R3, CONSTANT +#endif + prefetchnta (BUF) + + movdqa %xmm1, %xmm5 + pclmulqdq $0x00, CONSTANT, %xmm1 + pclmulqdq $0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm2, %xmm1 + + movdqa %xmm1, %xmm5 + pclmulqdq $0x00, CONSTANT, %xmm1 + pclmulqdq $0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm3, %xmm1 + + movdqa %xmm1, %xmm5 + pclmulqdq $0x00, CONSTANT, %xmm1 + pclmulqdq $0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + + cmp $0x10, LEN + jb fold_64 +loop_16:/* Folding rest buffer into 128bit */ + movdqa %xmm1, %xmm5 + pclmulqdq $0x00, CONSTANT, %xmm1 + pclmulqdq $0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor (BUF), %xmm1 + sub $0x10, LEN + add $0x10, BUF + cmp $0x10, LEN + jge loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ + psrldq $0x08, %xmm1 + pxor CONSTANT, %xmm1 + + /* final 32-bit fold */ + movdqa %xmm1, %xmm2 +#ifdef __x86_64__ + movdqa .Lconstant_R5(%rip), CONSTANT + movdqa .Lconstant_mask32(%rip), %xmm3 +#else + movdqa .Lconstant_R5, CONSTANT + movdqa .Lconstant_mask32, %xmm3 +#endif + psrldq $0x04, %xmm2 + pand %xmm3, %xmm1 + pclmulqdq $0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ +#ifdef __x86_64__ + movdqa .Lconstant_RUpoly(%rip), CONSTANT +#else + movdqa .Lconstant_RUpoly, CONSTANT +#endif + movdqa %xmm1, %xmm2 + pand %xmm3, %xmm1 + pclmulqdq $0x10, CONSTANT, %xmm1 + pand %xmm3, %xmm1 + pclmulqdq $0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + pextrd $0x01, %xmm1, %eax + + RET +SYM_FUNC_END(crc32_pclmul_le_16) diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c new file mode 100644 index 000000000..98cf3b4e4 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -0,0 +1,201 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define PCLMUL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pclmul_le_16 */ +#define SCALE_F 16L /* size of xmm register */ +#define SCALE_F_MASK (SCALE_F - 1) + +u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); + +static u32 __attribute__((pure)) + crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient; + unsigned int iremainder; + unsigned int prealign; + + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) + return crc32_le(crc, p, len); + + if ((long)p & SCALE_F_MASK) { + /* align p to 16 byte */ + prealign = SCALE_F - ((long)p & SCALE_F_MASK); + + crc = crc32_le(crc, p, prealign); + len -= prealign; + p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & + ~SCALE_F_MASK); + } + iquotient = len & (~SCALE_F_MASK); + iremainder = len & SCALE_F_MASK; + + kernel_fpu_begin(); + crc = crc32_pclmul_le_16(p, iquotient, crc); + kernel_fpu_end(); + + if (iremainder) + crc = crc32_le(crc, p + iquotient, iremainder); + + return crc; +} + +static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + +static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) + return -EINVAL; + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pclmul_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32_pclmul_le(*crcp, data, len); + return 0; +} + +/* No final XOR 0xFFFFFFFF, like crc32_le */ +static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); + return 0; +} + +static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(crcp); + return 0; +} + +static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = crc32_pclmul_setkey, + .init = crc32_pclmul_init, + .update = crc32_pclmul_update, + .final = crc32_pclmul_final, + .finup = crc32_pclmul_finup, + .digest = crc32_pclmul_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-pclmul", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_pclmul_cra_init, + } +}; + +static const struct x86_cpu_id crc32pclmul_cpu_id[] = { + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); + + +static int __init crc32_pclmul_mod_init(void) +{ + + if (!x86_match_cpu(crc32pclmul_cpu_id)) { + pr_info("PCLMULQDQ-NI instructions are not detected.\n"); + return -ENODEV; + } + return crypto_register_shash(&alg); +} + +static void __exit crc32_pclmul_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32_pclmul_mod_init); +module_exit(crc32_pclmul_mod_fini); + +MODULE_AUTHOR("Alexander Boyko "); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c new file mode 100644 index 000000000..feccb5254 --- /dev/null +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang + * Kent Liu + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define SCALE_F sizeof(unsigned long) + +#ifdef CONFIG_X86_64 +#define CRC32_INST "crc32q %1, %q0" +#else +#define CRC32_INST "crc32l %1, %0" +#endif + +#ifdef CONFIG_X86_64 +/* + * use carryless multiply version of crc32c when buffer + * size is >= 512 to account + * for fpu state save/restore overhead. + */ +#define CRC32C_PCL_BREAKEVEN 512 + +asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, + unsigned int crc_init); +#endif /* CONFIG_X86_64 */ + +static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) +{ + while (length--) { + asm("crc32b %1, %0" + : "+r" (crc) : "rm" (*data)); + data++; + } + + return crc; +} + +static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient = len / SCALE_F; + unsigned int iremainder = len % SCALE_F; + unsigned long *ptmp = (unsigned long *)p; + + while (iquotient--) { + asm(CRC32_INST + : "+r" (crc) : "rm" (*ptmp)); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) + return -EINVAL; + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32c_intel_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32c_intel_le_hw(*crcp, data, len); + return 0; +} + +static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); + return 0; +} + +static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_intel_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = ~cpu_to_le32p(crcp); + return 0; +} + +static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static int crc32c_intel_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + + return 0; +} + +#ifdef CONFIG_X86_64 +static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + /* + * use faster PCL version if datasize is large enough to + * overcome kernel fpu state save/restore overhead + */ + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { + kernel_fpu_begin(); + *crcp = crc_pcl(data, len, *crcp); + kernel_fpu_end(); + } else + *crcp = crc32c_intel_le_hw(*crcp, data, len); + return 0; +} + +static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { + kernel_fpu_begin(); + *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); + kernel_fpu_end(); + } else + *(__le32 *)out = + ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); + return 0; +} + +static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} +#endif /* CONFIG_X86_64 */ + +static struct shash_alg alg = { + .setkey = crc32c_intel_setkey, + .init = crc32c_intel_init, + .update = crc32c_intel_update, + .final = crc32c_intel_final, + .finup = crc32c_intel_finup, + .digest = crc32c_intel_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-intel", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32c_intel_cra_init, + } +}; + +static const struct x86_cpu_id crc32c_cpu_id[] = { + X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); + +static int __init crc32c_intel_mod_init(void) +{ + if (!x86_match_cpu(crc32c_cpu_id)) + return -ENODEV; +#ifdef CONFIG_X86_64 + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { + alg.update = crc32c_pcl_intel_update; + alg.finup = crc32c_pcl_intel_finup; + alg.digest = crc32c_pcl_intel_digest; + } +#endif + return crypto_register_shash(&alg); +} + +static void __exit crc32c_intel_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32c_intel_mod_init); +module_exit(crc32c_intel_mod_fini); + +MODULE_AUTHOR("Austin Zhang , Kent Liu "); +MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS_CRYPTO("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c-intel"); diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S new file mode 100644 index 000000000..ec35915f0 --- /dev/null +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -0,0 +1,463 @@ +/* + * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) + * + * The white papers on CRC32C calculations with PCLMULQDQ instruction can be + * downloaded from: + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf + * + * Copyright (C) 2012 Intel Corporation. + * + * Authors: + * Wajdi Feghali + * James Guilford + * David Cote + * Tim Chen + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction + +.macro LABEL prefix n +\prefix\n\(): +.endm + +.macro JMPTBL_ENTRY i +.quad crc_\i +.endm + +.macro JNC_LESS_THAN j + jnc less_than_\j +.endm + +# Define threshold where buffers are considered "small" and routed to more +# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so +# SMALL_SIZE can be no larger than 255. + +#define SMALL_SIZE 200 + +.if (SMALL_SIZE > 255) +.error "SMALL_ SIZE must be < 256" +.endif + +# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); + +.text +SYM_FUNC_START(crc_pcl) +#define bufp rdi +#define bufp_dw %edi +#define bufp_w %di +#define bufp_b %dil +#define bufptmp %rcx +#define block_0 %rcx +#define block_1 %rdx +#define block_2 %r11 +#define len %rsi +#define len_dw %esi +#define len_w %si +#define len_b %sil +#define crc_init_arg %rdx +#define tmp %rbx +#define crc_init %r8 +#define crc_init_dw %r8d +#define crc1 %r9 +#define crc2 %r10 + + pushq %rbx + pushq %rdi + pushq %rsi + + ## Move crc_init for Linux to a different + mov crc_init_arg, crc_init + + ################################################################ + ## 1) ALIGN: + ################################################################ + + mov %bufp, bufptmp # rdi = *buf + neg %bufp + and $7, %bufp # calculate the unalignment amount of + # the address + je proc_block # Skip if aligned + + ## If len is less than 8 and we're unaligned, we need to jump + ## to special code to avoid reading beyond the end of the buffer + cmp $8, len + jae do_align + # less_than_8 expects length in upper 3 bits of len_dw + # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] + shl $32-3+1, len_dw + jmp less_than_8_post_shl1 + +do_align: + #### Calculate CRC of unaligned bytes of the buffer (if any) + movq (bufptmp), tmp # load a quadward from the buffer + add %bufp, bufptmp # align buffer pointer for quadword + # processing + sub %bufp, len # update buffer length +align_loop: + crc32b %bl, crc_init_dw # compute crc32 of 1-byte + shr $8, tmp # get next byte + dec %bufp + jne align_loop + +proc_block: + + ################################################################ + ## 2) PROCESS BLOCKS: + ################################################################ + + ## compute num of bytes to be processed + movq len, tmp # save num bytes in tmp + + cmpq $128*24, len + jae full_block + +continue_block: + cmpq $SMALL_SIZE, len + jb small + + ## len < 128*24 + movq $2731, %rax # 2731 = ceil(2^16 / 24) + mul len_dw + shrq $16, %rax + + ## eax contains floor(bytes / 24) = num 24-byte chunks to do + + ## process rax 24-byte chunks (128 >= rax >= 0) + + ## compute end address of each block + ## block 0 (base addr + RAX * 8) + ## block 1 (base addr + RAX * 16) + ## block 2 (base addr + RAX * 24) + lea (bufptmp, %rax, 8), block_0 + lea (block_0, %rax, 8), block_1 + lea (block_1, %rax, 8), block_2 + + xor crc1, crc1 + xor crc2, crc2 + + ## branch into array + mov jump_table(,%rax,8), %bufp + JMP_NOSPEC bufp + + ################################################################ + ## 2a) PROCESS FULL BLOCKS: + ################################################################ +full_block: + movl $128,%eax + lea 128*8*2(block_0), block_1 + lea 128*8*3(block_0), block_2 + add $128*8*1, block_0 + + xor crc1,crc1 + xor crc2,crc2 + + # Fall thruogh into top of crc array (crc_128) + + ################################################################ + ## 3) CRC Array: + ################################################################ + +crc_array: + i=128 +.rept 128-1 +.altmacro +LABEL crc_ %i +.noaltmacro + ENDBR + crc32q -i*8(block_0), crc_init + crc32q -i*8(block_1), crc1 + crc32q -i*8(block_2), crc2 + i=(i-1) +.endr + +.altmacro +LABEL crc_ %i +.noaltmacro + ENDBR + crc32q -i*8(block_0), crc_init + crc32q -i*8(block_1), crc1 +# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet + + mov block_2, block_0 + + ################################################################ + ## 4) Combine three results: + ################################################################ + + lea (K_table-8)(%rip), %bufp # first entry is for idx 1 + shlq $3, %rax # rax *= 8 + pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2 + leal (%eax,%eax,2), %eax # rax *= 3 (total *24) + subq %rax, tmp # tmp -= rax*24 + + movq crc_init, %xmm1 # CRC for block 1 + pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2 + + movq crc1, %xmm2 # CRC for block 2 + pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 + + pxor %xmm2,%xmm1 + movq %xmm1, %rax + xor -i*8(block_2), %rax + mov crc2, crc_init + crc32 %rax, crc_init + + ################################################################ + ## 5) Check for end: + ################################################################ + +LABEL crc_ 0 + ENDBR + mov tmp, len + cmp $128*24, tmp + jae full_block + cmp $24, tmp + jae continue_block + +less_than_24: + shl $32-4, len_dw # less_than_16 expects length + # in upper 4 bits of len_dw + jnc less_than_16 + crc32q (bufptmp), crc_init + crc32q 8(bufptmp), crc_init + jz do_return + add $16, bufptmp + # len is less than 8 if we got here + # less_than_8 expects length in upper 3 bits of len_dw + # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] + shl $2, len_dw + jmp less_than_8_post_shl1 + + ####################################################################### + ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) + ####################################################################### +small: + shl $32-8, len_dw # Prepare len_dw for less_than_256 + j=256 +.rept 5 # j = {256, 128, 64, 32, 16} +.altmacro +LABEL less_than_ %j # less_than_j: Length should be in + # upper lg(j) bits of len_dw + j=(j/2) + shl $1, len_dw # Get next MSB + JNC_LESS_THAN %j +.noaltmacro + i=0 +.rept (j/8) + crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data + i=i+8 +.endr + jz do_return # Return if remaining length is zero + add $j, bufptmp # Advance buf +.endr + +less_than_8: # Length should be stored in + # upper 3 bits of len_dw + shl $1, len_dw +less_than_8_post_shl1: + jnc less_than_4 + crc32l (bufptmp), crc_init_dw # CRC of 4 bytes + jz do_return # return if remaining data is zero + add $4, bufptmp +less_than_4: # Length should be stored in + # upper 2 bits of len_dw + shl $1, len_dw + jnc less_than_2 + crc32w (bufptmp), crc_init_dw # CRC of 2 bytes + jz do_return # return if remaining data is zero + add $2, bufptmp +less_than_2: # Length should be stored in the MSB + # of len_dw + shl $1, len_dw + jnc less_than_1 + crc32b (bufptmp), crc_init_dw # CRC of 1 byte +less_than_1: # Length should be zero +do_return: + movq crc_init, %rax + popq %rsi + popq %rdi + popq %rbx + RET +SYM_FUNC_END(crc_pcl) + +.section .rodata, "a", @progbits + ################################################################ + ## jump table Table is 129 entries x 2 bytes each + ################################################################ +.align 4 +jump_table: + i=0 +.rept 129 +.altmacro +JMPTBL_ENTRY %i +.noaltmacro + i=i+1 +.endr + + + ################################################################ + ## PCLMULQDQ tables + ## Table is 128 entries x 2 words (8 bytes) each + ################################################################ +.align 8 +K_table: + .long 0x493c7d27, 0x00000001 + .long 0xba4fc28e, 0x493c7d27 + .long 0xddc0152b, 0xf20c0dfe + .long 0x9e4addf8, 0xba4fc28e + .long 0x39d3b296, 0x3da6d0cb + .long 0x0715ce53, 0xddc0152b + .long 0x47db8317, 0x1c291d04 + .long 0x0d3b6092, 0x9e4addf8 + .long 0xc96cfdc0, 0x740eef02 + .long 0x878a92a7, 0x39d3b296 + .long 0xdaece73e, 0x083a6eec + .long 0xab7aff2a, 0x0715ce53 + .long 0x2162d385, 0xc49f4f67 + .long 0x83348832, 0x47db8317 + .long 0x299847d5, 0x2ad91c30 + .long 0xb9e02b86, 0x0d3b6092 + .long 0x18b33a4e, 0x6992cea2 + .long 0xb6dd949b, 0xc96cfdc0 + .long 0x78d9ccb7, 0x7e908048 + .long 0xbac2fd7b, 0x878a92a7 + .long 0xa60ce07b, 0x1b3d8f29 + .long 0xce7f39f4, 0xdaece73e + .long 0x61d82e56, 0xf1d0f55e + .long 0xd270f1a2, 0xab7aff2a + .long 0xc619809d, 0xa87ab8a8 + .long 0x2b3cac5d, 0x2162d385 + .long 0x65863b64, 0x8462d800 + .long 0x1b03397f, 0x83348832 + .long 0xebb883bd, 0x71d111a8 + .long 0xb3e32c28, 0x299847d5 + .long 0x064f7f26, 0xffd852c6 + .long 0xdd7e3b0c, 0xb9e02b86 + .long 0xf285651c, 0xdcb17aa4 + .long 0x10746f3c, 0x18b33a4e + .long 0xc7a68855, 0xf37c5aee + .long 0x271d9844, 0xb6dd949b + .long 0x8e766a0c, 0x6051d5a2 + .long 0x93a5f730, 0x78d9ccb7 + .long 0x6cb08e5c, 0x18b0d4ff + .long 0x6b749fb2, 0xbac2fd7b + .long 0x1393e203, 0x21f3d99c + .long 0xcec3662e, 0xa60ce07b + .long 0x96c515bb, 0x8f158014 + .long 0xe6fc4e6a, 0xce7f39f4 + .long 0x8227bb8a, 0xa00457f7 + .long 0xb0cd4768, 0x61d82e56 + .long 0x39c7ff35, 0x8d6d2c43 + .long 0xd7a4825c, 0xd270f1a2 + .long 0x0ab3844b, 0x00ac29cf + .long 0x0167d312, 0xc619809d + .long 0xf6076544, 0xe9adf796 + .long 0x26f6a60a, 0x2b3cac5d + .long 0xa741c1bf, 0x96638b34 + .long 0x98d8d9cb, 0x65863b64 + .long 0x49c3cc9c, 0xe0e9f351 + .long 0x68bce87a, 0x1b03397f + .long 0x57a3d037, 0x9af01f2d + .long 0x6956fc3b, 0xebb883bd + .long 0x42d98888, 0x2cff42cf + .long 0x3771e98f, 0xb3e32c28 + .long 0xb42ae3d9, 0x88f25a3a + .long 0x2178513a, 0x064f7f26 + .long 0xe0ac139e, 0x4e36f0b0 + .long 0x170076fa, 0xdd7e3b0c + .long 0x444dd413, 0xbd6f81f8 + .long 0x6f345e45, 0xf285651c + .long 0x41d17b64, 0x91c9bd4b + .long 0xff0dba97, 0x10746f3c + .long 0xa2b73df1, 0x885f087b + .long 0xf872e54c, 0xc7a68855 + .long 0x1e41e9fc, 0x4c144932 + .long 0x86d8e4d2, 0x271d9844 + .long 0x651bd98b, 0x52148f02 + .long 0x5bb8f1bc, 0x8e766a0c + .long 0xa90fd27a, 0xa3c6f37a + .long 0xb3af077a, 0x93a5f730 + .long 0x4984d782, 0xd7c0557f + .long 0xca6ef3ac, 0x6cb08e5c + .long 0x234e0b26, 0x63ded06a + .long 0xdd66cbbb, 0x6b749fb2 + .long 0x4597456a, 0x4d56973c + .long 0xe9e28eb4, 0x1393e203 + .long 0x7b3ff57a, 0x9669c9df + .long 0xc9c8b782, 0xcec3662e + .long 0x3f70cc6f, 0xe417f38a + .long 0x93e106a4, 0x96c515bb + .long 0x62ec6c6d, 0x4b9e0f71 + .long 0xd813b325, 0xe6fc4e6a + .long 0x0df04680, 0xd104b8fc + .long 0x2342001e, 0x8227bb8a + .long 0x0a2a8d7e, 0x5b397730 + .long 0x6d9a4957, 0xb0cd4768 + .long 0xe8b6368b, 0xe78eb416 + .long 0xd2c3ed1a, 0x39c7ff35 + .long 0x995a5724, 0x61ff0e01 + .long 0x9ef68d35, 0xd7a4825c + .long 0x0c139b31, 0x8d96551c + .long 0xf2271e60, 0x0ab3844b + .long 0x0b0bf8ca, 0x0bf80dd2 + .long 0x2664fd8b, 0x0167d312 + .long 0xed64812d, 0x8821abed + .long 0x02ee03b2, 0xf6076544 + .long 0x8604ae0f, 0x6a45d2b2 + .long 0x363bd6b3, 0x26f6a60a + .long 0x135c83fd, 0xd8d26619 + .long 0x5fabe670, 0xa741c1bf + .long 0x35ec3279, 0xde87806c + .long 0x00bcf5f6, 0x98d8d9cb + .long 0x8ae00689, 0x14338754 + .long 0x17f27698, 0x49c3cc9c + .long 0x58ca5f00, 0x5bd2011f + .long 0xaa7c7ad5, 0x68bce87a + .long 0xb5cfca28, 0xdd07448e + .long 0xded288f8, 0x57a3d037 + .long 0x59f229bc, 0xdde8f5b9 + .long 0x6d390dec, 0x6956fc3b + .long 0x37170390, 0xa3e3e02c + .long 0x6353c1cc, 0x42d98888 + .long 0xc4584f5c, 0xd73c7bea + .long 0xf48642e9, 0x3771e98f + .long 0x531377e2, 0x80ff0093 + .long 0xdd35bc8d, 0xb42ae3d9 + .long 0xb25b29f2, 0x8fe4c34d + .long 0x9a5ede41, 0x2178513a + .long 0xa563905d, 0xdf99fc11 + .long 0x45cddf4e, 0xe0ac139e + .long 0xacfa3103, 0x6c23e841 + .long 0xa51b6135, 0x170076fa diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S new file mode 100644 index 000000000..721474abf --- /dev/null +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -0,0 +1,333 @@ +######################################################################## +# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions +# +# Copyright (c) 2013, Intel Corporation +# +# Authors: +# Erdinc Ozturk +# Vinodh Gopal +# James Guilford +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Reference paper titled "Fast CRC Computation for Generic +# Polynomials Using PCLMULQDQ Instruction" +# URL: http://www.intel.com/content/dam/www/public/us/en/documents +# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +# + +#include + +.text + +#define init_crc %edi +#define buf %rsi +#define len %rdx + +#define FOLD_CONSTS %xmm10 +#define BSWAP_MASK %xmm11 + +# Fold reg1, reg2 into the next 32 data bytes, storing the result back into +# reg1, reg2. +.macro fold_32_bytes offset, reg1, reg2 + movdqu \offset(buf), %xmm9 + movdqu \offset+16(buf), %xmm12 + pshufb BSWAP_MASK, %xmm9 + pshufb BSWAP_MASK, %xmm12 + movdqa \reg1, %xmm8 + movdqa \reg2, %xmm13 + pclmulqdq $0x00, FOLD_CONSTS, \reg1 + pclmulqdq $0x11, FOLD_CONSTS, %xmm8 + pclmulqdq $0x00, FOLD_CONSTS, \reg2 + pclmulqdq $0x11, FOLD_CONSTS, %xmm13 + pxor %xmm9 , \reg1 + xorps %xmm8 , \reg1 + pxor %xmm12, \reg2 + xorps %xmm13, \reg2 +.endm + +# Fold src_reg into dst_reg. +.macro fold_16_bytes src_reg, dst_reg + movdqa \src_reg, %xmm8 + pclmulqdq $0x11, FOLD_CONSTS, \src_reg + pclmulqdq $0x00, FOLD_CONSTS, %xmm8 + pxor %xmm8, \dst_reg + xorps \src_reg, \dst_reg +.endm + +# +# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len); +# +# Assumes len >= 16. +# +.align 16 +SYM_FUNC_START(crc_t10dif_pcl) + + movdqa .Lbswap_mask(%rip), BSWAP_MASK + + # For sizes less than 256 bytes, we can't fold 128 bytes at a time. + cmp $256, len + jl .Lless_than_256_bytes + + # Load the first 128 data bytes. Byte swapping is necessary to make the + # bit order match the polynomial coefficient order. + movdqu 16*0(buf), %xmm0 + movdqu 16*1(buf), %xmm1 + movdqu 16*2(buf), %xmm2 + movdqu 16*3(buf), %xmm3 + movdqu 16*4(buf), %xmm4 + movdqu 16*5(buf), %xmm5 + movdqu 16*6(buf), %xmm6 + movdqu 16*7(buf), %xmm7 + add $128, buf + pshufb BSWAP_MASK, %xmm0 + pshufb BSWAP_MASK, %xmm1 + pshufb BSWAP_MASK, %xmm2 + pshufb BSWAP_MASK, %xmm3 + pshufb BSWAP_MASK, %xmm4 + pshufb BSWAP_MASK, %xmm5 + pshufb BSWAP_MASK, %xmm6 + pshufb BSWAP_MASK, %xmm7 + + # XOR the first 16 data *bits* with the initial CRC value. + pxor %xmm8, %xmm8 + pinsrw $7, init_crc, %xmm8 + pxor %xmm8, %xmm0 + + movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS + + # Subtract 128 for the 128 data bytes just consumed. Subtract another + # 128 to simplify the termination condition of the following loop. + sub $256, len + + # While >= 128 data bytes remain (not counting xmm0-7), fold the 128 + # bytes xmm0-7 into them, storing the result back into xmm0-7. +.Lfold_128_bytes_loop: + fold_32_bytes 0, %xmm0, %xmm1 + fold_32_bytes 32, %xmm2, %xmm3 + fold_32_bytes 64, %xmm4, %xmm5 + fold_32_bytes 96, %xmm6, %xmm7 + add $128, buf + sub $128, len + jge .Lfold_128_bytes_loop + + # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7. + + # Fold across 64 bytes. + movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS + fold_16_bytes %xmm0, %xmm4 + fold_16_bytes %xmm1, %xmm5 + fold_16_bytes %xmm2, %xmm6 + fold_16_bytes %xmm3, %xmm7 + # Fold across 32 bytes. + movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS + fold_16_bytes %xmm4, %xmm6 + fold_16_bytes %xmm5, %xmm7 + # Fold across 16 bytes. + movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS + fold_16_bytes %xmm6, %xmm7 + + # Add 128 to get the correct number of data bytes remaining in 0...127 + # (not counting xmm7), following the previous extra subtraction by 128. + # Then subtract 16 to simplify the termination condition of the + # following loop. + add $128-16, len + + # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes + # xmm7 into them, storing the result back into xmm7. + jl .Lfold_16_bytes_loop_done +.Lfold_16_bytes_loop: + movdqa %xmm7, %xmm8 + pclmulqdq $0x11, FOLD_CONSTS, %xmm7 + pclmulqdq $0x00, FOLD_CONSTS, %xmm8 + pxor %xmm8, %xmm7 + movdqu (buf), %xmm0 + pshufb BSWAP_MASK, %xmm0 + pxor %xmm0 , %xmm7 + add $16, buf + sub $16, len + jge .Lfold_16_bytes_loop + +.Lfold_16_bytes_loop_done: + # Add 16 to get the correct number of data bytes remaining in 0...15 + # (not counting xmm7), following the previous extra subtraction by 16. + add $16, len + je .Lreduce_final_16_bytes + +.Lhandle_partial_segment: + # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16 + # bytes are in xmm7 and the rest are the remaining data in 'buf'. To do + # this without needing a fold constant for each possible 'len', redivide + # the bytes into a first chunk of 'len' bytes and a second chunk of 16 + # bytes, then fold the first chunk into the second. + + movdqa %xmm7, %xmm2 + + # xmm1 = last 16 original data bytes + movdqu -16(buf, len), %xmm1 + pshufb BSWAP_MASK, %xmm1 + + # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes. + lea .Lbyteshift_table+16(%rip), %rax + sub len, %rax + movdqu (%rax), %xmm0 + pshufb %xmm0, %xmm2 + + # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes. + pxor .Lmask1(%rip), %xmm0 + pshufb %xmm0, %xmm7 + + # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes), + # then '16-len' bytes from xmm2 (high-order bytes). + pblendvb %xmm2, %xmm1 #xmm0 is implicit + + # Fold the first chunk into the second chunk, storing the result in xmm7. + movdqa %xmm7, %xmm8 + pclmulqdq $0x11, FOLD_CONSTS, %xmm7 + pclmulqdq $0x00, FOLD_CONSTS, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm1, %xmm7 + +.Lreduce_final_16_bytes: + # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC + + # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'. + movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS + + # Fold the high 64 bits into the low 64 bits, while also multiplying by + # x^64. This produces a 128-bit value congruent to x^64 * M(x) and + # whose low 48 bits are 0. + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x)) + pslldq $8, %xmm0 + pxor %xmm0, %xmm7 # + low bits * x^64 + + # Fold the high 32 bits into the low 96 bits. This produces a 96-bit + # value congruent to x^64 * M(x) and whose low 48 bits are 0. + movdqa %xmm7, %xmm0 + pand .Lmask2(%rip), %xmm0 # zero high 32 bits + psrldq $12, %xmm7 # extract high 32 bits + pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x)) + pxor %xmm0, %xmm7 # + low bits + + # Load G(x) and floor(x^48 / G(x)). + movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS + + # Use Barrett reduction to compute the final CRC value. + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x)) + psrlq $32, %xmm7 # /= x^32 + pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x) + psrlq $48, %xmm0 + pxor %xmm7, %xmm0 # + low 16 nonzero bits + # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. + + pextrw $0, %xmm0, %eax + RET + +.align 16 +.Lless_than_256_bytes: + # Checksumming a buffer of length 16...255 bytes + + # Load the first 16 data bytes. + movdqu (buf), %xmm7 + pshufb BSWAP_MASK, %xmm7 + add $16, buf + + # XOR the first 16 data *bits* with the initial CRC value. + pxor %xmm0, %xmm0 + pinsrw $7, init_crc, %xmm0 + pxor %xmm0, %xmm7 + + movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS + cmp $16, len + je .Lreduce_final_16_bytes # len == 16 + sub $32, len + jge .Lfold_16_bytes_loop # 32 <= len <= 255 + add $16, len + jmp .Lhandle_partial_segment # 17 <= len <= 31 +SYM_FUNC_END(crc_t10dif_pcl) + +.section .rodata, "a", @progbits +.align 16 + +# Fold constants precomputed from the polynomial 0x18bb7 +# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 +.Lfold_across_128_bytes_consts: + .quad 0x0000000000006123 # x^(8*128) mod G(x) + .quad 0x0000000000002295 # x^(8*128+64) mod G(x) +.Lfold_across_64_bytes_consts: + .quad 0x0000000000001069 # x^(4*128) mod G(x) + .quad 0x000000000000dd31 # x^(4*128+64) mod G(x) +.Lfold_across_32_bytes_consts: + .quad 0x000000000000857d # x^(2*128) mod G(x) + .quad 0x0000000000007acc # x^(2*128+64) mod G(x) +.Lfold_across_16_bytes_consts: + .quad 0x000000000000a010 # x^(1*128) mod G(x) + .quad 0x0000000000001faa # x^(1*128+64) mod G(x) +.Lfinal_fold_consts: + .quad 0x1368000000000000 # x^48 * (x^48 mod G(x)) + .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x)) +.Lbarrett_reduction_consts: + .quad 0x0000000000018bb7 # G(x) + .quad 0x00000001f65a57f8 # floor(x^48 / G(x)) + +.section .rodata.cst16.mask1, "aM", @progbits, 16 +.align 16 +.Lmask1: + .octa 0x80808080808080808080808080808080 + +.section .rodata.cst16.mask2, "aM", @progbits, 16 +.align 16 +.Lmask2: + .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF + +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090A0B0C0D0E0F + +.section .rodata.cst32.byteshift_table, "aM", @progbits, 32 +.align 16 +# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len] +# is the index vector to shift left by 'len' bytes, and is also {0x80, ..., +# 0x80} XOR the index vector to shift right by '16 - len' bytes. +.Lbyteshift_table: + .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 + .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c new file mode 100644 index 000000000..71291d5af --- /dev/null +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -0,0 +1,143 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions + * + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); + +struct chksum_desc_ctx { + __u16 crc; +}; + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + if (length >= 16 && crypto_simd_usable()) { + kernel_fpu_begin(); + ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); + kernel_fpu_end(); + } else + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) +{ + if (len >= 16 && crypto_simd_usable()) { + kernel_fpu_begin(); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); + kernel_fpu_end(); + } else + *(__u16 *)out = crc_t10dif_generic(crc, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + return __chksum_finup(0, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-pclmul", + .cra_priority = 200, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static const struct x86_cpu_id crct10dif_cpu_id[] = { + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); + +static int __init crct10dif_intel_mod_init(void) +{ + if (!x86_match_cpu(crct10dif_cpu_id)) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit crct10dif_intel_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crct10dif_intel_mod_init); +module_exit(crct10dif_intel_mod_fini); + +MODULE_AUTHOR("Tim Chen "); +MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-pclmul"); diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c new file mode 100644 index 000000000..d55fa9e9b --- /dev/null +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -0,0 +1,1724 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +static __always_inline u64 eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; +} + +static __always_inline u64 gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; +} + +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) +{ + u64 carry_r; + + asm volatile( + /* Clear registers to propagate the carry bit */ + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %k1, %k1;" + + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" + + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r"(f2), "=&r"(carry_r) + : "r"(out), "r"(f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc"); + + return carry_r; +} + +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + /* Step 2: Add carry*38 to the original sum */ + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes the field subtraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw subtraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Subtract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results: */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes the field multiplication of four-element f1 with value in f2 + * Requires f2 to be smaller than 2^17 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) +{ + register u64 f2_r asm("rdx") = f2; + + asm volatile( + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", + "memory", "cc"); +} + +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) +{ + asm volatile( + /* Transfer bit into CF flag */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Step 1: Compute all partial products */ + " movq 32(%0), %%rdx;" /* f[0] */ + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%0), %%rdx;" /* f[3] */ + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) +{ + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); +} + +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) +{ + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); +} + +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) +{ + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); +} + +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) +{ + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); +} + +static void finv(u64 *o, const u64 *i, u64 *tmp) +{ + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); +} + +static void store_felem(u64 *b, u64 *f) +{ + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 f31; + u64 top_bit; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void encode_point(u8 *o, const u64 *i) +{ + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); +} + +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) +{ + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; + } + } + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); +} + +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ + +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL +}; + +static void curve25519_ever64_base(u8 *out, const u8 *priv) +{ + u64 swap = 1; + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; + + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; + + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); + + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); + swap = swap ^ bit; + cswap2(swap, xz1, xz2); + swap = bit; + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); + ++j; + } + j = 0; + } + + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); + + memzero_explicit(tmp, sizeof(tmp)); +} + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); + +void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64(mypublic, secret, basepoint); + else + curve25519_generic(mypublic, secret, basepoint); +} +EXPORT_SYMBOL(curve25519_arch); + +void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64_base(pub, secret); + else + curve25519_generic(pub, secret, curve25519_base_point); +} +EXPORT_SYMBOL(curve25519_base_arch); + +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, + unsigned int len) +{ + u8 *secret = kpp_tfm_ctx(tfm); + + if (!len) + curve25519_generate_secret(secret); + else if (len == CURVE25519_KEY_SIZE && + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) + memcpy(secret, buf, CURVE25519_KEY_SIZE); + else + return -EINVAL; + return 0; +} + +static int curve25519_generate_public_key(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + const u8 *secret = kpp_tfm_ctx(tfm); + u8 buf[CURVE25519_KEY_SIZE]; + int copied, nbytes; + + if (req->src) + return -EINVAL; + + curve25519_base_arch(buf, secret); + + /* might want less than we've got */ + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, + nbytes), + buf, nbytes); + if (copied != nbytes) + return -EINVAL; + return 0; +} + +static int curve25519_compute_shared_secret(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + const u8 *secret = kpp_tfm_ctx(tfm); + u8 public_key[CURVE25519_KEY_SIZE]; + u8 buf[CURVE25519_KEY_SIZE]; + int copied, nbytes; + + if (!req->src) + return -EINVAL; + + copied = sg_copy_to_buffer(req->src, + sg_nents_for_len(req->src, + CURVE25519_KEY_SIZE), + public_key, CURVE25519_KEY_SIZE); + if (copied != CURVE25519_KEY_SIZE) + return -EINVAL; + + curve25519_arch(buf, secret, public_key); + + /* might want less than we've got */ + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, + nbytes), + buf, nbytes); + if (copied != nbytes) + return -EINVAL; + return 0; +} + +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) +{ + return CURVE25519_KEY_SIZE; +} + +static struct kpp_alg curve25519_alg = { + .base.cra_name = "curve25519", + .base.cra_driver_name = "curve25519-x86", + .base.cra_priority = 200, + .base.cra_module = THIS_MODULE, + .base.cra_ctxsize = CURVE25519_KEY_SIZE, + + .set_secret = curve25519_set_secret, + .generate_public_key = curve25519_generate_public_key, + .compute_shared_secret = curve25519_compute_shared_secret, + .max_size = curve25519_max_size, +}; + + +static int __init curve25519_mod_init(void) +{ + if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) + static_branch_enable(&curve25519_use_bmi2_adx); + else + return 0; + return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? + crypto_register_kpp(&curve25519_alg) : 0; +} + +static void __exit curve25519_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && + static_branch_likely(&curve25519_use_bmi2_adx)) + crypto_unregister_kpp(&curve25519_alg); +} + +module_init(curve25519_mod_init); +module_exit(curve25519_mod_exit); + +MODULE_ALIAS_CRYPTO("curve25519"); +MODULE_ALIAS_CRYPTO("curve25519-x86"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S new file mode 100644 index 000000000..f4c760f4c --- /dev/null +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -0,0 +1,799 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher + * + * Copyright © 2014 Jussi Kivilinna + */ + +#include + +.file "des3_ede-asm_64.S" +.text + +#define s1 .L_s1 +#define s2 ((s1) + (64*8)) +#define s3 ((s2) + (64*8)) +#define s4 ((s3) + (64*8)) +#define s5 ((s4) + (64*8)) +#define s6 ((s5) + (64*8)) +#define s7 ((s6) + (64*8)) +#define s8 ((s7) + (64*8)) + +/* register macros */ +#define CTX %rdi + +#define RL0 %r8 +#define RL1 %r9 +#define RL2 %r10 + +#define RL0d %r8d +#define RL1d %r9d +#define RL2d %r10d + +#define RR0 %r11 +#define RR1 %r12 +#define RR2 %r13 + +#define RR0d %r11d +#define RR1d %r12d +#define RR2d %r13d + +#define RW0 %rax +#define RW1 %rbx +#define RW2 %rcx + +#define RW0d %eax +#define RW1d %ebx +#define RW2d %ecx + +#define RW0bl %al +#define RW1bl %bl +#define RW2bl %cl + +#define RW0bh %ah +#define RW1bh %bh +#define RW2bh %ch + +#define RT0 %r15 +#define RT1 %rsi +#define RT2 %r14 +#define RT3 %rdx + +#define RT0d %r15d +#define RT1d %esi +#define RT2d %r14d +#define RT3d %edx + +/*********************************************************************** + * 1-way 3DES + ***********************************************************************/ +#define do_permutation(a, b, offset, mask) \ + movl a, RT0d; \ + shrl $(offset), RT0d; \ + xorl b, RT0d; \ + andl $(mask), RT0d; \ + xorl RT0d, b; \ + shll $(offset), RT0d; \ + xorl RT0d, a; + +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation(left, right) \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + movl left##d, RW0d; \ + roll $1, right##d; \ + xorl right##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##d; \ + xorl RW0d, right##d; \ + roll $1, left##d; \ + expand_to_64bits(right, RT3); \ + expand_to_64bits(left, RT3); + +#define final_permutation(left, right) \ + compress_to_64bits(right); \ + compress_to_64bits(left); \ + movl right##d, RW0d; \ + rorl $1, left##d; \ + xorl left##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##d; \ + xorl RW0d, left##d; \ + rorl $1, right##d; \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); + +#define round1(n, from, to, load_next_key) \ + xorq from, RW0; \ + \ + movzbl RW0bl, RT0d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + shrq $16, RW0; \ + movq s8(, RT0, 8), RT0; \ + xorq s6(, RT1, 8), to; \ + movzbl RW0bl, RL1d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s4(, RT2, 8), RT0; \ + xorq s2(, RT3, 8), to; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + xorq s7(, RL1, 8), RT0; \ + xorq s5(, RT1, 8), to; \ + xorq s3(, RT2, 8), RT0; \ + load_next_key(n, RW0); \ + xorq RT0, to; \ + xorq s1(, RT3, 8), to; \ + +#define load_next_key(n, RWx) \ + movq (((n) + 1) * 8)(CTX), RWx; + +#define dummy2(a, b) /*_*/ + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +SYM_FUNC_START(des3_ede_x86_64_crypt_blk) + /* input: + * %rdi: round keys, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + pushq %rsi; /* dst */ + + read_block(%rdx, RL0, RR0); + initial_permutation(RL0, RR0); + + movq (CTX), RW0; + + round1(0, RR0, RL0, load_next_key); + round1(1, RL0, RR0, load_next_key); + round1(2, RR0, RL0, load_next_key); + round1(3, RL0, RR0, load_next_key); + round1(4, RR0, RL0, load_next_key); + round1(5, RL0, RR0, load_next_key); + round1(6, RR0, RL0, load_next_key); + round1(7, RL0, RR0, load_next_key); + round1(8, RR0, RL0, load_next_key); + round1(9, RL0, RR0, load_next_key); + round1(10, RR0, RL0, load_next_key); + round1(11, RL0, RR0, load_next_key); + round1(12, RR0, RL0, load_next_key); + round1(13, RL0, RR0, load_next_key); + round1(14, RR0, RL0, load_next_key); + round1(15, RL0, RR0, load_next_key); + + round1(16+0, RL0, RR0, load_next_key); + round1(16+1, RR0, RL0, load_next_key); + round1(16+2, RL0, RR0, load_next_key); + round1(16+3, RR0, RL0, load_next_key); + round1(16+4, RL0, RR0, load_next_key); + round1(16+5, RR0, RL0, load_next_key); + round1(16+6, RL0, RR0, load_next_key); + round1(16+7, RR0, RL0, load_next_key); + round1(16+8, RL0, RR0, load_next_key); + round1(16+9, RR0, RL0, load_next_key); + round1(16+10, RL0, RR0, load_next_key); + round1(16+11, RR0, RL0, load_next_key); + round1(16+12, RL0, RR0, load_next_key); + round1(16+13, RR0, RL0, load_next_key); + round1(16+14, RL0, RR0, load_next_key); + round1(16+15, RR0, RL0, load_next_key); + + round1(32+0, RR0, RL0, load_next_key); + round1(32+1, RL0, RR0, load_next_key); + round1(32+2, RR0, RL0, load_next_key); + round1(32+3, RL0, RR0, load_next_key); + round1(32+4, RR0, RL0, load_next_key); + round1(32+5, RL0, RR0, load_next_key); + round1(32+6, RR0, RL0, load_next_key); + round1(32+7, RL0, RR0, load_next_key); + round1(32+8, RR0, RL0, load_next_key); + round1(32+9, RL0, RR0, load_next_key); + round1(32+10, RR0, RL0, load_next_key); + round1(32+11, RL0, RR0, load_next_key); + round1(32+12, RR0, RL0, load_next_key); + round1(32+13, RL0, RR0, load_next_key); + round1(32+14, RR0, RL0, load_next_key); + round1(32+15, RL0, RR0, dummy2); + + final_permutation(RR0, RL0); + + popq %rsi /* dst */ + write_block(%rsi, RR0, RL0); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + + RET; +SYM_FUNC_END(des3_ede_x86_64_crypt_blk) + +/*********************************************************************** + * 3-way 3DES + ***********************************************************************/ +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation3(left, right) \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + \ + movl left##0d, RW0d; \ + roll $1, right##0d; \ + xorl right##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##0d; \ + xorl RW0d, right##0d; \ + roll $1, left##0d; \ + expand_to_64bits(right##0, RT3); \ + expand_to_64bits(left##0, RT3); \ + movl left##1d, RW1d; \ + roll $1, right##1d; \ + xorl right##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, left##1d; \ + xorl RW1d, right##1d; \ + roll $1, left##1d; \ + expand_to_64bits(right##1, RT3); \ + expand_to_64bits(left##1, RT3); \ + movl left##2d, RW2d; \ + roll $1, right##2d; \ + xorl right##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, left##2d; \ + xorl RW2d, right##2d; \ + roll $1, left##2d; \ + expand_to_64bits(right##2, RT3); \ + expand_to_64bits(left##2, RT3); + +#define final_permutation3(left, right) \ + compress_to_64bits(right##0); \ + compress_to_64bits(left##0); \ + movl right##0d, RW0d; \ + rorl $1, left##0d; \ + xorl left##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##0d; \ + xorl RW0d, left##0d; \ + rorl $1, right##0d; \ + compress_to_64bits(right##1); \ + compress_to_64bits(left##1); \ + movl right##1d, RW1d; \ + rorl $1, left##1d; \ + xorl left##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, right##1d; \ + xorl RW1d, left##1d; \ + rorl $1, right##1d; \ + compress_to_64bits(right##2); \ + compress_to_64bits(left##2); \ + movl right##2d, RW2d; \ + rorl $1, left##2d; \ + xorl left##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, right##2d; \ + xorl RW2d, left##2d; \ + rorl $1, right##2d; \ + \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); + +#define round3(n, from, to, load_next_key, do_movq) \ + xorq from##0, RW0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s8(, RT3, 8), to##0; \ + xorq s6(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s4(, RT3, 8), to##0; \ + xorq s2(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s7(, RT3, 8), to##0; \ + xorq s5(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + load_next_key(n, RW0); \ + xorq s3(, RT3, 8), to##0; \ + xorq s1(, RT1, 8), to##0; \ + xorq from##1, RW1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s8(, RT3, 8), to##1; \ + xorq s6(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s4(, RT3, 8), to##1; \ + xorq s2(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrl $16, RW1d; \ + xorq s7(, RT3, 8), to##1; \ + xorq s5(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + do_movq(RW0, RW1); \ + xorq s3(, RT3, 8), to##1; \ + xorq s1(, RT1, 8), to##1; \ + xorq from##2, RW2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s8(, RT3, 8), to##2; \ + xorq s6(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s4(, RT3, 8), to##2; \ + xorq s2(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrl $16, RW2d; \ + xorq s7(, RT3, 8), to##2; \ + xorq s5(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + do_movq(RW0, RW2); \ + xorq s3(, RT3, 8), to##2; \ + xorq s1(, RT1, 8), to##2; + +#define __movq(src, dst) \ + movq src, dst; + +SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way) + /* input: + * %rdi: ctx, round keys + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + */ + + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + pushq %rsi /* dst */ + + /* load input */ + movl 0 * 4(%rdx), RL0d; + movl 1 * 4(%rdx), RR0d; + movl 2 * 4(%rdx), RL1d; + movl 3 * 4(%rdx), RR1d; + movl 4 * 4(%rdx), RL2d; + movl 5 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + initial_permutation3(RL, RR); + + movq 0(CTX), RW0; + movq RW0, RW1; + movq RW0, RW2; + + round3(0, RR, RL, load_next_key, __movq); + round3(1, RL, RR, load_next_key, __movq); + round3(2, RR, RL, load_next_key, __movq); + round3(3, RL, RR, load_next_key, __movq); + round3(4, RR, RL, load_next_key, __movq); + round3(5, RL, RR, load_next_key, __movq); + round3(6, RR, RL, load_next_key, __movq); + round3(7, RL, RR, load_next_key, __movq); + round3(8, RR, RL, load_next_key, __movq); + round3(9, RL, RR, load_next_key, __movq); + round3(10, RR, RL, load_next_key, __movq); + round3(11, RL, RR, load_next_key, __movq); + round3(12, RR, RL, load_next_key, __movq); + round3(13, RL, RR, load_next_key, __movq); + round3(14, RR, RL, load_next_key, __movq); + round3(15, RL, RR, load_next_key, __movq); + + round3(16+0, RL, RR, load_next_key, __movq); + round3(16+1, RR, RL, load_next_key, __movq); + round3(16+2, RL, RR, load_next_key, __movq); + round3(16+3, RR, RL, load_next_key, __movq); + round3(16+4, RL, RR, load_next_key, __movq); + round3(16+5, RR, RL, load_next_key, __movq); + round3(16+6, RL, RR, load_next_key, __movq); + round3(16+7, RR, RL, load_next_key, __movq); + round3(16+8, RL, RR, load_next_key, __movq); + round3(16+9, RR, RL, load_next_key, __movq); + round3(16+10, RL, RR, load_next_key, __movq); + round3(16+11, RR, RL, load_next_key, __movq); + round3(16+12, RL, RR, load_next_key, __movq); + round3(16+13, RR, RL, load_next_key, __movq); + round3(16+14, RL, RR, load_next_key, __movq); + round3(16+15, RR, RL, load_next_key, __movq); + + round3(32+0, RR, RL, load_next_key, __movq); + round3(32+1, RL, RR, load_next_key, __movq); + round3(32+2, RR, RL, load_next_key, __movq); + round3(32+3, RL, RR, load_next_key, __movq); + round3(32+4, RR, RL, load_next_key, __movq); + round3(32+5, RL, RR, load_next_key, __movq); + round3(32+6, RR, RL, load_next_key, __movq); + round3(32+7, RL, RR, load_next_key, __movq); + round3(32+8, RR, RL, load_next_key, __movq); + round3(32+9, RL, RR, load_next_key, __movq); + round3(32+10, RR, RL, load_next_key, __movq); + round3(32+11, RL, RR, load_next_key, __movq); + round3(32+12, RR, RL, load_next_key, __movq); + round3(32+13, RL, RR, load_next_key, __movq); + round3(32+14, RR, RL, load_next_key, __movq); + round3(32+15, RL, RR, dummy2, dummy2); + + final_permutation3(RR, RL); + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + popq %rsi /* dst */ + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + + RET; +SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way) + +.section .rodata, "a", @progbits +.align 16 +.L_s1: + .quad 0x0010100001010400, 0x0000000000000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0010100001010004, 0x0000100000010404 + .quad 0x0000000000000004, 0x0000100000010000 + .quad 0x0000000000000400, 0x0010100001010400 + .quad 0x0010100001010404, 0x0000000000000400 + .quad 0x0010000001000404, 0x0010100001010004 + .quad 0x0010000001000000, 0x0000000000000004 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000100000010400 + .quad 0x0000100000010400, 0x0010100001010000 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0000100000010004, 0x0010000001000004 + .quad 0x0010000001000004, 0x0000100000010004 + .quad 0x0000000000000000, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010000001000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0000000000000004, 0x0010100001010000 + .quad 0x0010100001010400, 0x0010000001000000 + .quad 0x0010000001000000, 0x0000000000000400 + .quad 0x0010100001010004, 0x0000100000010000 + .quad 0x0000100000010400, 0x0010000001000004 + .quad 0x0000000000000400, 0x0000000000000004 + .quad 0x0010000001000404, 0x0000100000010404 + .quad 0x0010100001010404, 0x0000100000010004 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0010000001000004, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010100001010400 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000000000000000 + .quad 0x0000100000010004, 0x0000100000010400 + .quad 0x0000000000000000, 0x0010100001010004 +.L_s2: + .quad 0x0801080200100020, 0x0800080000000000 + .quad 0x0000080000000000, 0x0001080200100020 + .quad 0x0001000000100000, 0x0000000200000020 + .quad 0x0801000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0801080200100020 + .quad 0x0801080000100000, 0x0800000000000000 + .quad 0x0800080000000000, 0x0001000000100000 + .quad 0x0000000200000020, 0x0801000200100020 + .quad 0x0001080000100000, 0x0001000200100020 + .quad 0x0800080200000020, 0x0000000000000000 + .quad 0x0800000000000000, 0x0000080000000000 + .quad 0x0001080200100020, 0x0801000000100000 + .quad 0x0001000200100020, 0x0800000200000020 + .quad 0x0000000000000000, 0x0001080000100000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0801000000100000, 0x0000080200000020 + .quad 0x0000000000000000, 0x0001080200100020 + .quad 0x0801000200100020, 0x0001000000100000 + .quad 0x0800080200000020, 0x0801000000100000 + .quad 0x0801080000100000, 0x0000080000000000 + .quad 0x0801000000100000, 0x0800080000000000 + .quad 0x0000000200000020, 0x0801080200100020 + .quad 0x0001080200100020, 0x0000000200000020 + .quad 0x0000080000000000, 0x0800000000000000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0001000000100000, 0x0800000200000020 + .quad 0x0001000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0001000200100020 + .quad 0x0001080000100000, 0x0000000000000000 + .quad 0x0800080000000000, 0x0000080200000020 + .quad 0x0800000000000000, 0x0801000200100020 + .quad 0x0801080200100020, 0x0001080000100000 +.L_s3: + .quad 0x0000002000000208, 0x0000202008020200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000202000020208, 0x0000002008000200 + .quad 0x0000200000020008, 0x0000000008000008 + .quad 0x0000000008000008, 0x0000200000020000 + .quad 0x0000202008020208, 0x0000200000020008 + .quad 0x0000200008020000, 0x0000002000000208 + .quad 0x0000000008000000, 0x0000000000000008 + .quad 0x0000202008020200, 0x0000002000000200 + .quad 0x0000202000020200, 0x0000200008020000 + .quad 0x0000200008020008, 0x0000202000020208 + .quad 0x0000002008000208, 0x0000202000020200 + .quad 0x0000200000020000, 0x0000002008000208 + .quad 0x0000000000000008, 0x0000202008020208 + .quad 0x0000002000000200, 0x0000000008000000 + .quad 0x0000202008020200, 0x0000000008000000 + .quad 0x0000200000020008, 0x0000002000000208 + .quad 0x0000200000020000, 0x0000202008020200 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000002000000200, 0x0000200000020008 + .quad 0x0000202008020208, 0x0000002008000200 + .quad 0x0000000008000008, 0x0000002000000200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000208, 0x0000200000020000 + .quad 0x0000000008000000, 0x0000202008020208 + .quad 0x0000000000000008, 0x0000202000020208 + .quad 0x0000202000020200, 0x0000000008000008 + .quad 0x0000200008020000, 0x0000002008000208 + .quad 0x0000002000000208, 0x0000200008020000 + .quad 0x0000202000020208, 0x0000000000000008 + .quad 0x0000200008020008, 0x0000202000020200 +.L_s4: + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x0000020000002000, 0x0008020800002000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x0000020000002000, 0x0008020800002000 +.L_s5: + .quad 0x0000001000000100, 0x0020001002080100 + .quad 0x0020000002080000, 0x0420001002000100 + .quad 0x0000000000080000, 0x0000001000000100 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0400001000080100, 0x0000000000080000 + .quad 0x0020001002000100, 0x0400001000080100 + .quad 0x0420001002000100, 0x0420000002080000 + .quad 0x0000001000080100, 0x0400000000000000 + .quad 0x0020000002000000, 0x0400000000080000 + .quad 0x0400000000080000, 0x0000000000000000 + .quad 0x0400001000000100, 0x0420001002080100 + .quad 0x0420001002080100, 0x0020001002000100 + .quad 0x0420000002080000, 0x0400001000000100 + .quad 0x0000000000000000, 0x0420000002000000 + .quad 0x0020001002080100, 0x0020000002000000 + .quad 0x0420000002000000, 0x0000001000080100 + .quad 0x0000000000080000, 0x0420001002000100 + .quad 0x0000001000000100, 0x0020000002000000 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0420001002000100, 0x0400001000080100 + .quad 0x0020001002000100, 0x0400000000000000 + .quad 0x0420000002080000, 0x0020001002080100 + .quad 0x0400001000080100, 0x0000001000000100 + .quad 0x0020000002000000, 0x0420000002080000 + .quad 0x0420001002080100, 0x0000001000080100 + .quad 0x0420000002000000, 0x0420001002080100 + .quad 0x0020000002080000, 0x0000000000000000 + .quad 0x0400000000080000, 0x0420000002000000 + .quad 0x0000001000080100, 0x0020001002000100 + .quad 0x0400001000000100, 0x0000000000080000 + .quad 0x0000000000000000, 0x0400000000080000 + .quad 0x0020001002080100, 0x0400001000000100 +.L_s6: + .quad 0x0200000120000010, 0x0204000020000000 + .quad 0x0000040000000000, 0x0204040120000010 + .quad 0x0204000020000000, 0x0000000100000010 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0200040020000000, 0x0004040100000010 + .quad 0x0004000000000000, 0x0200000120000010 + .quad 0x0004000100000010, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0000000000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000040000000000 + .quad 0x0004040000000000, 0x0200040120000010 + .quad 0x0000000100000010, 0x0204000120000010 + .quad 0x0204000120000010, 0x0000000000000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000040100000010, 0x0004040000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0200040020000000, 0x0000000100000010 + .quad 0x0204000120000010, 0x0004040000000000 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0000040100000010, 0x0200000120000010 + .quad 0x0004000000000000, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0200000120000010, 0x0204040120000010 + .quad 0x0004040000000000, 0x0204000020000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000000000000000, 0x0204000120000010 + .quad 0x0000000100000010, 0x0000040000000000 + .quad 0x0204000020000000, 0x0004040100000010 + .quad 0x0000040000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000000000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0004000100000010, 0x0200040120000010 +.L_s7: + .quad 0x0002000000200000, 0x2002000004200002 + .quad 0x2000000004000802, 0x0000000000000000 + .quad 0x0000000000000800, 0x2000000004000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2002000004200802, 0x0002000000200000 + .quad 0x0000000000000000, 0x2000000004000002 + .quad 0x2000000000000002, 0x0000000004000000 + .quad 0x2002000004200002, 0x2000000000000802 + .quad 0x0000000004000800, 0x2002000000200802 + .quad 0x2002000000200002, 0x0000000004000800 + .quad 0x2000000004000002, 0x0002000004200000 + .quad 0x0002000004200800, 0x2002000000200002 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000000000802, 0x2002000004200802 + .quad 0x0002000000200800, 0x2000000000000002 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0002000000200000, 0x2000000004000802 + .quad 0x2000000004000802, 0x2002000004200002 + .quad 0x2002000004200002, 0x2000000000000002 + .quad 0x2002000000200002, 0x0000000004000000 + .quad 0x0000000004000800, 0x0002000000200000 + .quad 0x0002000004200800, 0x2000000000000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2000000000000802, 0x2000000004000002 + .quad 0x2002000004200802, 0x0002000004200000 + .quad 0x0002000000200800, 0x0000000000000000 + .quad 0x2000000000000002, 0x2002000004200802 + .quad 0x0000000000000000, 0x2002000000200802 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000004000002, 0x0000000004000800 + .quad 0x0000000000000800, 0x2002000000200002 +.L_s8: + .quad 0x0100010410001000, 0x0000010000001000 + .quad 0x0000000000040000, 0x0100010410041000 + .quad 0x0100000010000000, 0x0100010410001000 + .quad 0x0000000400000000, 0x0100000010000000 + .quad 0x0000000400040000, 0x0100000010040000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0100010010041000, 0x0000010400041000 + .quad 0x0000010000001000, 0x0000000400000000 + .quad 0x0100000010040000, 0x0100000410000000 + .quad 0x0100010010001000, 0x0000010400001000 + .quad 0x0000010000041000, 0x0000000400040000 + .quad 0x0100000410040000, 0x0100010010041000 + .quad 0x0000010400001000, 0x0000000000000000 + .quad 0x0000000000000000, 0x0100000410040000 + .quad 0x0100000410000000, 0x0100010010001000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0100010010041000, 0x0000010000001000 + .quad 0x0000000400000000, 0x0100000410040000 + .quad 0x0000010000001000, 0x0000010400041000 + .quad 0x0100010010001000, 0x0000000400000000 + .quad 0x0100000410000000, 0x0100000010040000 + .quad 0x0100000410040000, 0x0100000010000000 + .quad 0x0000000000040000, 0x0100010410001000 + .quad 0x0000000000000000, 0x0100010410041000 + .quad 0x0000000400040000, 0x0100000410000000 + .quad 0x0100000010040000, 0x0100010010001000 + .quad 0x0100010410001000, 0x0000000000000000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0000010000041000, 0x0000010400001000 + .quad 0x0000010400001000, 0x0000000400040000 + .quad 0x0100000010000000, 0x0100010010041000 diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c new file mode 100644 index 000000000..abb8b1fe1 --- /dev/null +++ b/arch/x86/crypto/des3_ede_glue.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for assembler optimized version of 3DES + * + * Copyright © 2014 Jussi Kivilinna + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + */ + +#include +#include +#include +#include +#include +#include +#include + +struct des3_ede_x86_ctx { + struct des3_ede_ctx enc; + struct des3_ede_ctx dec; +}; + +/* regular block cipher functions */ +asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst, + const u8 *src); + +static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *enc_ctx = ctx->enc.expkey; + + des3_ede_x86_64_crypt_blk(enc_ctx, dst, src); +} + +static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *dec_ctx = ctx->dec.expkey; + + des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); +} + +static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *dec_ctx = ctx->dec.expkey; + + des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src); +} + +static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static int ecb_crypt(struct skcipher_request *req, const u32 *expkey) +{ + const unsigned int bsize = DES3_EDE_BLOCK_SIZE; + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + des3_ede_x86_64_crypt_blk_3way(expkey, wdst, + wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_crypt(req, ctx->enc.expkey); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_crypt(req, ctx->dec.expkey); +} + +static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) +{ + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) +{ + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[3 - 1]; + u64 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * 3 - bsize; + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + dst[1] ^= ivs[0]; + dst[2] ^= ivs[1]; + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + } + + /* Handle leftovers */ + for (;;) { + des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm); + u32 i, j, tmp; + int err; + + err = des3_ede_expand_key(&ctx->enc, key, keylen); + if (err == -ENOKEY) { + if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) + err = -EINVAL; + else + err = 0; + } + + if (err) { + memset(ctx, 0, sizeof(*ctx)); + return err; + } + + /* Fix encryption context for this implementation and form decryption + * context. */ + j = DES3_EDE_EXPKEY_WORDS - 2; + for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) { + tmp = ror32(ctx->enc.expkey[i + 1], 4); + ctx->enc.expkey[i + 1] = tmp; + + ctx->dec.expkey[j + 0] = ctx->enc.expkey[i + 0]; + ctx->dec.expkey[j + 1] = tmp; + } + + return 0; +} + +static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, + unsigned int keylen) +{ + return des3_ede_x86_setkey(&tfm->base, key, keylen); +} + +static struct crypto_alg des3_ede_cipher = { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_x86_setkey, + .cia_encrypt = des3_ede_x86_encrypt, + .cia_decrypt = des3_ede_x86_decrypt, + } + } +}; + +static struct skcipher_alg des3_ede_skciphers[] = { + { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + } +}; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, des3_ede-x86_64 is slower than generic C + * implementation because use of 64bit rotates (which are really + * slow on P4). Therefore blacklist P4s. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init des3_ede_x86_init(void) +{ + int err; + + if (!force && is_blacklisted_cpu()) { + pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); + return -ENODEV; + } + + err = crypto_register_alg(&des3_ede_cipher); + if (err) + return err; + + err = crypto_register_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); + if (err) + crypto_unregister_alg(&des3_ede_cipher); + + return err; +} + +static void __exit des3_ede_x86_fini(void) +{ + crypto_unregister_alg(&des3_ede_cipher); + crypto_unregister_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); +} + +module_init(des3_ede_x86_init); +module_exit(des3_ede_x86_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); +MODULE_ALIAS_CRYPTO("des3_ede"); +MODULE_ALIAS_CRYPTO("des3_ede-asm"); +MODULE_AUTHOR("Jussi Kivilinna "); diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h new file mode 100644 index 000000000..eaa15c7b2 --- /dev/null +++ b/arch/x86/crypto/ecb_cbc_helpers.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _CRYPTO_ECB_CBC_HELPER_H +#define _CRYPTO_ECB_CBC_HELPER_H + +#include +#include + +/* + * Mode helpers to instantiate parameterized skcipher ECB/CBC modes without + * having to rely on indirect calls and retpolines. + */ + +#define ECB_WALK_START(req, bsize, fpu_blocks) do { \ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \ + const int __bsize = (bsize); \ + struct skcipher_walk walk; \ + int err = skcipher_walk_virt(&walk, (req), false); \ + while (walk.nbytes > 0) { \ + unsigned int nbytes = walk.nbytes; \ + bool do_fpu = (fpu_blocks) != -1 && \ + nbytes >= (fpu_blocks) * __bsize; \ + const u8 *src = walk.src.virt.addr; \ + u8 *dst = walk.dst.virt.addr; \ + u8 __maybe_unused buf[(bsize)]; \ + if (do_fpu) kernel_fpu_begin() + +#define CBC_WALK_START(req, bsize, fpu_blocks) \ + ECB_WALK_START(req, bsize, fpu_blocks) + +#define ECB_WALK_ADVANCE(blocks) do { \ + dst += (blocks) * __bsize; \ + src += (blocks) * __bsize; \ + nbytes -= (blocks) * __bsize; \ +} while (0) + +#define ECB_BLOCK(blocks, func) do { \ + while (nbytes >= (blocks) * __bsize) { \ + (func)(ctx, dst, src); \ + ECB_WALK_ADVANCE(blocks); \ + } \ +} while (0) + +#define CBC_ENC_BLOCK(func) do { \ + const u8 *__iv = walk.iv; \ + while (nbytes >= __bsize) { \ + crypto_xor_cpy(dst, src, __iv, __bsize); \ + (func)(ctx, dst, dst); \ + __iv = dst; \ + ECB_WALK_ADVANCE(1); \ + } \ + memcpy(walk.iv, __iv, __bsize); \ +} while (0) + +#define CBC_DEC_BLOCK(blocks, func) do { \ + while (nbytes >= (blocks) * __bsize) { \ + const u8 *__iv = src + ((blocks) - 1) * __bsize; \ + if (dst == src) \ + __iv = memcpy(buf, __iv, __bsize); \ + (func)(ctx, dst, src); \ + crypto_xor(dst, walk.iv, __bsize); \ + memcpy(walk.iv, __iv, __bsize); \ + ECB_WALK_ADVANCE(blocks); \ + } \ +} while (0) + +#define ECB_WALK_END() \ + if (do_fpu) kernel_fpu_end(); \ + err = skcipher_walk_done(&walk, nbytes); \ + } \ + return err; \ +} while (0) + +#define CBC_WALK_END() ECB_WALK_END() + +#endif diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 000000000..2bf871899 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains accelerated part of ghash + * implementation. More information about PCLMULQDQ can be found at: + * + * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + */ + +#include +#include + +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f + +#define DATA %xmm0 +#define SHASH %xmm1 +#define T1 %xmm2 +#define T2 %xmm3 +#define T3 %xmm4 +#define BSWAP %xmm5 +#define IN1 %xmm6 + +.text + +/* + * __clmul_gf128mul_ble: internal ABI + * input: + * DATA: operand1 + * SHASH: operand2, hash_key << 1 mod poly + * output: + * DATA: operand1 * operand2 mod poly + * changed: + * T1 + * T2 + * T3 + */ +SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) + movaps DATA, T1 + pshufd $0b01001110, DATA, T2 + pshufd $0b01001110, SHASH, T3 + pxor DATA, T2 + pxor SHASH, T3 + + pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 + pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 + pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) + pxor DATA, T2 + pxor T1, T2 # T2 = a0 * b1 + a1 * b0 + + movaps T2, T3 + pslldq $8, T3 + psrldq $8, T2 + pxor T3, DATA + pxor T2, T1 # is result of + # carry-less multiplication + + # first phase of the reduction + movaps DATA, T3 + psllq $1, T3 + pxor DATA, T3 + psllq $5, T3 + pxor DATA, T3 + psllq $57, T3 + movaps T3, T2 + pslldq $8, T2 + psrldq $8, T3 + pxor T2, DATA + pxor T3, T1 + + # second phase of the reduction + movaps DATA, T2 + psrlq $5, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA + RET +SYM_FUNC_END(__clmul_gf128mul_ble) + +/* void clmul_ghash_mul(char *dst, const u128 *shash) */ +SYM_FUNC_START(clmul_ghash_mul) + FRAME_BEGIN + movups (%rdi), DATA + movups (%rsi), SHASH + movaps .Lbswap_mask, BSWAP + pshufb BSWAP, DATA + call __clmul_gf128mul_ble + pshufb BSWAP, DATA + movups DATA, (%rdi) + FRAME_END + RET +SYM_FUNC_END(clmul_ghash_mul) + +/* + * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + * const u128 *shash); + */ +SYM_FUNC_START(clmul_ghash_update) + FRAME_BEGIN + cmp $16, %rdx + jb .Lupdate_just_ret # check length + movaps .Lbswap_mask, BSWAP + movups (%rdi), DATA + movups (%rcx), SHASH + pshufb BSWAP, DATA +.align 4 +.Lupdate_loop: + movups (%rsi), IN1 + pshufb BSWAP, IN1 + pxor IN1, DATA + call __clmul_gf128mul_ble + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lupdate_loop + pshufb BSWAP, DATA + movups DATA, (%rdi) +.Lupdate_just_ret: + FRAME_END + RET +SYM_FUNC_END(clmul_ghash_update) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 000000000..c0ab0ff4a --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains glue code. + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +void clmul_ghash_mul(char *dst, const u128 *shash); + +void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const u128 *shash); + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +struct ghash_ctx { + u128 shash; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) + return -EINVAL; + + /* perform multiplication by 'x' in GF(2^128) */ + a = get_unaligned_be64(key); + b = get_unaligned_be64(key + 8); + + ctx->shash.a = (b << 1) | (a >> 63); + ctx->shash.b = (a << 1) | (b >> 63); + + if (a >> 63) + ctx->shash.b ^= ((u64)0xc2) << 56; + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + + kernel_fpu_begin(); + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + clmul_ghash_mul(dst, &ctx->shash); + } + + clmul_ghash_update(dst, src, srclen, &ctx->shash); + kernel_fpu_end(); + + if (srclen & 0xf) { + src += srclen - (srclen & 0xf); + srclen &= 0xf; + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + while (srclen--) + *dst++ ^= *src++; + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *dst = dctx->buffer; + + if (dctx->bytes) { + u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + while (dctx->bytes--) + *tmp++ ^= 0; + + kernel_fpu_begin(); + clmul_ghash_mul(dst, &ctx->shash); + kernel_fpu_end(); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "__ghash", + .cra_driver_name = "__ghash-pclmulqdqni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + return crypto_shash_init(desc); +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!crypto_simd_usable() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!crypto_simd_usable() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + ghash_async_init(req); + memcpy(dctx, in, sizeof(*dctx)); + return 0; + +} + +static int ghash_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memcpy(out, dctx, sizeof(*dctx)); + return 0; + +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!crypto_simd_usable() || + (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + return crypto_ahash_setkey(child, key, keylen); +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .export = ghash_async_export, + .import = ghash_async_import, + .halg = { + .digestsize = GHASH_DIGEST_SIZE, + .statesize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-clmulni", + .cra_priority = 400, + .cra_ctxsize = sizeof(struct ghash_async_ctx), + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_module = THIS_MODULE, + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, + }, +}; + +static const struct x86_cpu_id pcmul_cpu_id[] = { + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */ + {} +}; +MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); + +static int __init ghash_pclmulqdqni_mod_init(void) +{ + int err; + + if (!x86_match_cpu(pcmul_cpu_id)) + return -ENODEV; + + err = crypto_register_shash(&ghash_alg); + if (err) + goto err_out; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); +err_out: + return err; +} + +static void __exit ghash_pclmulqdqni_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_pclmulqdqni_mod_init); +module_exit(ghash_pclmulqdqni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI"); +MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S new file mode 100644 index 000000000..3da385271 --- /dev/null +++ b/arch/x86/crypto/glue_helper-asm-avx.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Shared glue code for 128bit block ciphers, AVX assembler macros + * + * Copyright © 2012-2013 Jussi Kivilinna + */ + +#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \ + vmovdqu (0*16)(src), x0; \ + vmovdqu (1*16)(src), x1; \ + vmovdqu (2*16)(src), x2; \ + vmovdqu (3*16)(src), x3; \ + vmovdqu (4*16)(src), x4; \ + vmovdqu (5*16)(src), x5; \ + vmovdqu (6*16)(src), x6; \ + vmovdqu (7*16)(src), x7; + +#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ + vmovdqu x0, (0*16)(dst); \ + vmovdqu x1, (1*16)(dst); \ + vmovdqu x2, (2*16)(dst); \ + vmovdqu x3, (3*16)(dst); \ + vmovdqu x4, (4*16)(dst); \ + vmovdqu x5, (5*16)(dst); \ + vmovdqu x6, (6*16)(dst); \ + vmovdqu x7, (7*16)(dst); + +#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ + vpxor (0*16)(src), x1, x1; \ + vpxor (1*16)(src), x2, x2; \ + vpxor (2*16)(src), x3, x3; \ + vpxor (3*16)(src), x4, x4; \ + vpxor (4*16)(src), x5, x5; \ + vpxor (5*16)(src), x6, x6; \ + vpxor (6*16)(src), x7, x7; \ + store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S new file mode 100644 index 000000000..c77e90494 --- /dev/null +++ b/arch/x86/crypto/glue_helper-asm-avx2.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Shared glue code for 128bit block ciphers, AVX2 assembler macros + * + * Copyright © 2012-2013 Jussi Kivilinna + */ + +#define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \ + vmovdqu (0*32)(src), x0; \ + vmovdqu (1*32)(src), x1; \ + vmovdqu (2*32)(src), x2; \ + vmovdqu (3*32)(src), x3; \ + vmovdqu (4*32)(src), x4; \ + vmovdqu (5*32)(src), x5; \ + vmovdqu (6*32)(src), x6; \ + vmovdqu (7*32)(src), x7; + +#define store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ + vmovdqu x0, (0*32)(dst); \ + vmovdqu x1, (1*32)(dst); \ + vmovdqu x2, (2*32)(dst); \ + vmovdqu x3, (3*32)(dst); \ + vmovdqu x4, (4*32)(dst); \ + vmovdqu x5, (5*32)(dst); \ + vmovdqu x6, (6*32)(dst); \ + vmovdqu x7, (7*32)(dst); + +#define store_cbc_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7, t0) \ + vpxor t0, t0, t0; \ + vinserti128 $1, (src), t0, t0; \ + vpxor t0, x0, x0; \ + vpxor (0*32+16)(src), x1, x1; \ + vpxor (1*32+16)(src), x2, x2; \ + vpxor (2*32+16)(src), x3, x3; \ + vpxor (3*32+16)(src), x4, x4; \ + vpxor (4*32+16)(src), x5, x5; \ + vpxor (5*32+16)(src), x6, x6; \ + vpxor (6*32+16)(src), x7, x7; \ + store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S new file mode 100644 index 000000000..6a0b15e71 --- /dev/null +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, x86_64 AVX2 accelerated + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + +#define PASS0_SUMS %ymm0 +#define PASS1_SUMS %ymm1 +#define PASS2_SUMS %ymm2 +#define PASS3_SUMS %ymm3 +#define K0 %ymm4 +#define K0_XMM %xmm4 +#define K1 %ymm5 +#define K1_XMM %xmm5 +#define K2 %ymm6 +#define K2_XMM %xmm6 +#define K3 %ymm7 +#define K3_XMM %xmm7 +#define T0 %ymm8 +#define T1 %ymm9 +#define T2 %ymm10 +#define T2_XMM %xmm10 +#define T3 %ymm11 +#define T3_XMM %xmm11 +#define T4 %ymm12 +#define T5 %ymm13 +#define T6 %ymm14 +#define T7 %ymm15 +#define KEY %rdi +#define MESSAGE %rsi +#define MESSAGE_LEN %rdx +#define HASH %rcx + +.macro _nh_2xstride k0, k1, k2, k3 + + // Add message words to key words + vpaddd \k0, T3, T0 + vpaddd \k1, T3, T1 + vpaddd \k2, T3, T2 + vpaddd \k3, T3, T3 + + // Multiply 32x32 => 64 and accumulate + vpshufd $0x10, T0, T4 + vpshufd $0x32, T0, T0 + vpshufd $0x10, T1, T5 + vpshufd $0x32, T1, T1 + vpshufd $0x10, T2, T6 + vpshufd $0x32, T2, T2 + vpshufd $0x10, T3, T7 + vpshufd $0x32, T3, T3 + vpmuludq T4, T0, T0 + vpmuludq T5, T1, T1 + vpmuludq T6, T2, T2 + vpmuludq T7, T3, T3 + vpaddq T0, PASS0_SUMS, PASS0_SUMS + vpaddq T1, PASS1_SUMS, PASS1_SUMS + vpaddq T2, PASS2_SUMS, PASS2_SUMS + vpaddq T3, PASS3_SUMS, PASS3_SUMS +.endm + +/* + * void nh_avx2(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +SYM_FUNC_START(nh_avx2) + + vmovdqu 0x00(KEY), K0 + vmovdqu 0x10(KEY), K1 + add $0x20, KEY + vpxor PASS0_SUMS, PASS0_SUMS, PASS0_SUMS + vpxor PASS1_SUMS, PASS1_SUMS, PASS1_SUMS + vpxor PASS2_SUMS, PASS2_SUMS, PASS2_SUMS + vpxor PASS3_SUMS, PASS3_SUMS, PASS3_SUMS + + sub $0x40, MESSAGE_LEN + jl .Lloop4_done +.Lloop4: + vmovdqu (MESSAGE), T3 + vmovdqu 0x00(KEY), K2 + vmovdqu 0x10(KEY), K3 + _nh_2xstride K0, K1, K2, K3 + + vmovdqu 0x20(MESSAGE), T3 + vmovdqu 0x20(KEY), K0 + vmovdqu 0x30(KEY), K1 + _nh_2xstride K2, K3, K0, K1 + + add $0x40, MESSAGE + add $0x40, KEY + sub $0x40, MESSAGE_LEN + jge .Lloop4 + +.Lloop4_done: + and $0x3f, MESSAGE_LEN + jz .Ldone + + cmp $0x20, MESSAGE_LEN + jl .Llast + + // 2 or 3 strides remain; do 2 more. + vmovdqu (MESSAGE), T3 + vmovdqu 0x00(KEY), K2 + vmovdqu 0x10(KEY), K3 + _nh_2xstride K0, K1, K2, K3 + add $0x20, MESSAGE + add $0x20, KEY + sub $0x20, MESSAGE_LEN + jz .Ldone + vmovdqa K2, K0 + vmovdqa K3, K1 +.Llast: + // Last stride. Zero the high 128 bits of the message and keys so they + // don't affect the result when processing them like 2 strides. + vmovdqu (MESSAGE), T3_XMM + vmovdqa K0_XMM, K0_XMM + vmovdqa K1_XMM, K1_XMM + vmovdqu 0x00(KEY), K2_XMM + vmovdqu 0x10(KEY), K3_XMM + _nh_2xstride K0, K1, K2, K3 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + + // PASS0_SUMS is (0A 0B 0C 0D) + // PASS1_SUMS is (1A 1B 1C 1D) + // PASS2_SUMS is (2A 2B 2C 2D) + // PASS3_SUMS is (3A 3B 3C 3D) + // We need the horizontal sums: + // (0A + 0B + 0C + 0D, + // 1A + 1B + 1C + 1D, + // 2A + 2B + 2C + 2D, + // 3A + 3B + 3C + 3D) + // + + vpunpcklqdq PASS1_SUMS, PASS0_SUMS, T0 // T0 = (0A 1A 0C 1C) + vpunpckhqdq PASS1_SUMS, PASS0_SUMS, T1 // T1 = (0B 1B 0D 1D) + vpunpcklqdq PASS3_SUMS, PASS2_SUMS, T2 // T2 = (2A 3A 2C 3C) + vpunpckhqdq PASS3_SUMS, PASS2_SUMS, T3 // T3 = (2B 3B 2D 3D) + + vinserti128 $0x1, T2_XMM, T0, T4 // T4 = (0A 1A 2A 3A) + vinserti128 $0x1, T3_XMM, T1, T5 // T5 = (0B 1B 2B 3B) + vperm2i128 $0x31, T2, T0, T0 // T0 = (0C 1C 2C 3C) + vperm2i128 $0x31, T3, T1, T1 // T1 = (0D 1D 2D 3D) + + vpaddq T5, T4, T4 + vpaddq T1, T0, T0 + vpaddq T4, T0, T0 + vmovdqu T0, (HASH) + RET +SYM_FUNC_END(nh_avx2) diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S new file mode 100644 index 000000000..34c567bbc --- /dev/null +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + +#define PASS0_SUMS %xmm0 +#define PASS1_SUMS %xmm1 +#define PASS2_SUMS %xmm2 +#define PASS3_SUMS %xmm3 +#define K0 %xmm4 +#define K1 %xmm5 +#define K2 %xmm6 +#define K3 %xmm7 +#define T0 %xmm8 +#define T1 %xmm9 +#define T2 %xmm10 +#define T3 %xmm11 +#define T4 %xmm12 +#define T5 %xmm13 +#define T6 %xmm14 +#define T7 %xmm15 +#define KEY %rdi +#define MESSAGE %rsi +#define MESSAGE_LEN %rdx +#define HASH %rcx + +.macro _nh_stride k0, k1, k2, k3, offset + + // Load next message stride + movdqu \offset(MESSAGE), T1 + + // Load next key stride + movdqu \offset(KEY), \k3 + + // Add message words to key words + movdqa T1, T2 + movdqa T1, T3 + paddd T1, \k0 // reuse k0 to avoid a move + paddd \k1, T1 + paddd \k2, T2 + paddd \k3, T3 + + // Multiply 32x32 => 64 and accumulate + pshufd $0x10, \k0, T4 + pshufd $0x32, \k0, \k0 + pshufd $0x10, T1, T5 + pshufd $0x32, T1, T1 + pshufd $0x10, T2, T6 + pshufd $0x32, T2, T2 + pshufd $0x10, T3, T7 + pshufd $0x32, T3, T3 + pmuludq T4, \k0 + pmuludq T5, T1 + pmuludq T6, T2 + pmuludq T7, T3 + paddq \k0, PASS0_SUMS + paddq T1, PASS1_SUMS + paddq T2, PASS2_SUMS + paddq T3, PASS3_SUMS +.endm + +/* + * void nh_sse2(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +SYM_FUNC_START(nh_sse2) + + movdqu 0x00(KEY), K0 + movdqu 0x10(KEY), K1 + movdqu 0x20(KEY), K2 + add $0x30, KEY + pxor PASS0_SUMS, PASS0_SUMS + pxor PASS1_SUMS, PASS1_SUMS + pxor PASS2_SUMS, PASS2_SUMS + pxor PASS3_SUMS, PASS3_SUMS + + sub $0x40, MESSAGE_LEN + jl .Lloop4_done +.Lloop4: + _nh_stride K0, K1, K2, K3, 0x00 + _nh_stride K1, K2, K3, K0, 0x10 + _nh_stride K2, K3, K0, K1, 0x20 + _nh_stride K3, K0, K1, K2, 0x30 + add $0x40, KEY + add $0x40, MESSAGE + sub $0x40, MESSAGE_LEN + jge .Lloop4 + +.Lloop4_done: + and $0x3f, MESSAGE_LEN + jz .Ldone + _nh_stride K0, K1, K2, K3, 0x00 + + sub $0x10, MESSAGE_LEN + jz .Ldone + _nh_stride K1, K2, K3, K0, 0x10 + + sub $0x10, MESSAGE_LEN + jz .Ldone + _nh_stride K2, K3, K0, K1, 0x20 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + movdqa PASS0_SUMS, T0 + movdqa PASS2_SUMS, T1 + punpcklqdq PASS1_SUMS, T0 // => (PASS0_SUM_A PASS1_SUM_A) + punpcklqdq PASS3_SUMS, T1 // => (PASS2_SUM_A PASS3_SUM_A) + punpckhqdq PASS1_SUMS, PASS0_SUMS // => (PASS0_SUM_B PASS1_SUM_B) + punpckhqdq PASS3_SUMS, PASS2_SUMS // => (PASS2_SUM_B PASS3_SUM_B) + paddq PASS0_SUMS, T0 + paddq PASS2_SUMS, T1 + movdqu T0, 0x00(HASH) + movdqu T1, 0x10(HASH) + RET +SYM_FUNC_END(nh_sse2) diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c new file mode 100644 index 000000000..8ea5ab0f1 --- /dev/null +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (AVX2 accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_avx2(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_avx2_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !crypto_simd_usable()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, SZ_4K); + + kernel_fpu_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); + kernel_fpu_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-avx2", + .base.cra_priority = 300, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_avx2_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (AVX2-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-avx2"); diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c new file mode 100644 index 000000000..2b353d42e --- /dev/null +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (SSE2 accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_sse2(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_sse2_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !crypto_simd_usable()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, SZ_4K); + + kernel_fpu_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); + kernel_fpu_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-sse2", + .base.cra_priority = 200, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_sse2_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_XMM2)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (SSE2-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-sse2"); diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl new file mode 100644 index 000000000..2077ce7a5 --- /dev/null +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -0,0 +1,4249 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Copyright (C) 2017-2018 Samuel Neves . All Rights Reserved. +# Copyright (C) 2017-2019 Jason A. Donenfeld . All Rights Reserved. +# Copyright (C) 2006-2017 CRYPTOGAMS by . All Rights Reserved. +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements Poly1305 hash for x86_64. +# +# March 2015 +# +# Initial release. +# +# December 2016 +# +# Add AVX512F+VL+BW code path. +# +# November 2017 +# +# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be +# executed even on Knights Landing. Trigger for modification was +# observation that AVX512 code paths can negatively affect overall +# Skylake-X system performance. Since we are likely to suppress +# AVX512F capability flag [at least on Skylake-X], conversion serves +# as kind of "investment protection". Note that next *lake processor, +# Cannonlake, has AVX512IFMA code path to execute... +# +# Numbers are cycles per processed byte with poly1305_blocks alone, +# measured with rdtsc at fixed clock frequency. +# +# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512 +# P4 4.46/+120% - +# Core 2 2.41/+90% - +# Westmere 1.88/+120% - +# Sandy Bridge 1.39/+140% 1.10 +# Haswell 1.14/+175% 1.11 0.65 +# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35] +# Silvermont 2.83/+95% - +# Knights L 3.60/? 1.65 1.10 0.41(***) +# Goldmont 1.70/+180% - +# VIA Nano 1.82/+150% - +# Sledgehammer 1.38/+160% - +# Bulldozer 2.30/+130% 0.97 +# Ryzen 1.15/+200% 1.08 1.18 +# +# (*) improvement coefficients relative to clang are more modest and +# are ~50% on most processors, in both cases we are comparing to +# __int128 code; +# (**) SSE2 implementation was attempted, but among non-AVX processors +# it was faster than integer-only code only on older Intel P4 and +# Core processors, 50-30%, less newer processor is, but slower on +# contemporary ones, for example almost 2x slower on Atom, and as +# former are naturally disappearing, SSE2 is deemed unnecessary; +# (***) strangely enough performance seems to vary from core to core, +# listed result is best case; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); +$kernel=0; $kernel=1 if (!$flavour && !$output); + +if (!$kernel) { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or + die "can't locate x86_64-xlate.pl"; + + open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; + *STDOUT=*OUT; + + if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25); + } + + if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { + $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12); + $avx += 1 if ($1==2.11 && $2>=8); + } + + if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + } + + if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); + } +} else { + $avx = 4; # The kernel uses ifdefs for this. +} + +sub declare_function() { + my ($name, $align, $nargs) = @_; + if($kernel) { + $code .= ".align $align\n"; + $code .= "SYM_FUNC_START($name)\n"; + $code .= ".L$name:\n"; + } else { + $code .= ".globl $name\n"; + $code .= ".type $name,\@function,$nargs\n"; + $code .= ".align $align\n"; + $code .= "$name:\n"; + } +} + +sub end_function() { + my ($name) = @_; + if($kernel) { + $code .= "SYM_FUNC_END($name)\n"; + } else { + $code .= ".size $name,.-$name\n"; + } +} + +$code.=<<___ if $kernel; +#include +___ + +if ($avx) { +$code.=<<___ if $kernel; +.section .rodata +___ +$code.=<<___; +.align 64 +.Lconst: +.Lmask24: +.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 +.L129: +.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 +.Lmask26: +.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 +.Lpermd_avx2: +.long 2,2,2,3,2,0,2,1 +.Lpermd_avx512: +.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 + +.L2_44_inp_permd: +.long 0,1,1,2,2,3,7,7 +.L2_44_inp_shift: +.quad 0,12,24,64 +.L2_44_mask: +.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff +.L2_44_shift_rgt: +.quad 44,44,42,64 +.L2_44_shift_lft: +.quad 8,8,10,64 + +.align 64 +.Lx_mask44: +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.Lx_mask42: +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +___ +} +$code.=<<___ if (!$kernel); +.asciz "Poly1305 for x86_64, CRYPTOGAMS by " +.align 16 +___ + +my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); +my ($mac,$nonce)=($inp,$len); # *_emit arguments +my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13"); +my ($h0,$h1,$h2)=("%r14","%rbx","%r10"); + +sub poly1305_iteration { +# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 +# output: $h0-$h2 *= $r0-$r1 +$code.=<<___; + mulq $h0 # h0*r1 + mov %rax,$d2 + mov $r0,%rax + mov %rdx,$d3 + + mulq $h0 # h0*r0 + mov %rax,$h0 # future $h0 + mov $r0,%rax + mov %rdx,$d1 + + mulq $h1 # h1*r0 + add %rax,$d2 + mov $s1,%rax + adc %rdx,$d3 + + mulq $h1 # h1*s1 + mov $h2,$h1 # borrow $h1 + add %rax,$h0 + adc %rdx,$d1 + + imulq $s1,$h1 # h2*s1 + add $h1,$d2 + mov $d1,$h1 + adc \$0,$d3 + + imulq $r0,$h2 # h2*r0 + add $d2,$h1 + mov \$-4,%rax # mask value + adc $h2,$d3 + + and $d3,%rax # last reduction step + mov $d3,$h2 + shr \$2,$d3 + and \$3,$h2 + add $d3,%rax + add %rax,$h0 + adc \$0,$h1 + adc \$0,$h2 +___ +} + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int64 h[3]; # current hash value base 2^64 +# unsigned __int64 r[2]; # key value base 2^64 + +$code.=<<___; +.text +___ +$code.=<<___ if (!$kernel); +.extern OPENSSL_ia32cap_P + +.globl poly1305_init_x86_64 +.hidden poly1305_init_x86_64 +.globl poly1305_blocks_x86_64 +.hidden poly1305_blocks_x86_64 +.globl poly1305_emit_x86_64 +.hidden poly1305_emit_x86_64 +___ +&declare_function("poly1305_init_x86_64", 32, 3); +$code.=<<___; + xor %eax,%eax + mov %rax,0($ctx) # initialize hash value + mov %rax,8($ctx) + mov %rax,16($ctx) + + test $inp,$inp + je .Lno_key +___ +$code.=<<___ if (!$kernel); + lea poly1305_blocks_x86_64(%rip),%r10 + lea poly1305_emit_x86_64(%rip),%r11 +___ +$code.=<<___ if (!$kernel && $avx); + mov OPENSSL_ia32cap_P+4(%rip),%r9 + lea poly1305_blocks_avx(%rip),%rax + lea poly1305_emit_avx(%rip),%rcx + bt \$`60-32`,%r9 # AVX? + cmovc %rax,%r10 + cmovc %rcx,%r11 +___ +$code.=<<___ if (!$kernel && $avx>1); + lea poly1305_blocks_avx2(%rip),%rax + bt \$`5+32`,%r9 # AVX2? + cmovc %rax,%r10 +___ +$code.=<<___ if (!$kernel && $avx>3); + mov \$`(1<<31|1<<21|1<<16)`,%rax + shr \$32,%r9 + and %rax,%r9 + cmp %rax,%r9 + je .Linit_base2_44 +___ +$code.=<<___; + mov \$0x0ffffffc0fffffff,%rax + mov \$0x0ffffffc0ffffffc,%rcx + and 0($inp),%rax + and 8($inp),%rcx + mov %rax,24($ctx) + mov %rcx,32($ctx) +___ +$code.=<<___ if (!$kernel && $flavour !~ /elf32/); + mov %r10,0(%rdx) + mov %r11,8(%rdx) +___ +$code.=<<___ if (!$kernel && $flavour =~ /elf32/); + mov %r10d,0(%rdx) + mov %r11d,4(%rdx) +___ +$code.=<<___; + mov \$1,%eax +.Lno_key: + RET +___ +&end_function("poly1305_init_x86_64"); + +&declare_function("poly1305_blocks_x86_64", 32, 4); +$code.=<<___; +.cfi_startproc +.Lblocks: + shr \$4,$len + jz .Lno_data # too short + + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 + push $ctx +.cfi_push $ctx +.Lblocks_body: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2 + + mov $s1,$r1 + shr \$2,$s1 + mov $r1,%rax + add $r1,$s1 # s1 = r1 + (r1 >> 2) + jmp .Loop + +.align 32 +.Loop: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 +___ + + &poly1305_iteration(); + +$code.=<<___; + mov $r1,%rax + dec %r15 # len-=16 + jnz .Loop + + mov 0(%rsp),$ctx +.cfi_restore $ctx + + mov $h0,0($ctx) # store hash value + mov $h1,8($ctx) + mov $h2,16($ctx) + + mov 8(%rsp),%r15 +.cfi_restore %r15 + mov 16(%rsp),%r14 +.cfi_restore %r14 + mov 24(%rsp),%r13 +.cfi_restore %r13 + mov 32(%rsp),%r12 +.cfi_restore %r12 + mov 40(%rsp),%rbx +.cfi_restore %rbx + lea 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lno_data: +.Lblocks_epilogue: + RET +.cfi_endproc +___ +&end_function("poly1305_blocks_x86_64"); + +&declare_function("poly1305_emit_x86_64", 32, 3); +$code.=<<___; +.Lemit: + mov 0($ctx),%r8 # load hash value + mov 8($ctx),%r9 + mov 16($ctx),%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + RET +___ +&end_function("poly1305_emit_x86_64"); +if ($avx) { + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int32 h[5]; # current hash value base 2^26 +# unsigned __int32 is_base2_26; +# unsigned __int64 r[2]; # key value base 2^64 +# unsigned __int64 pad; +# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9]; +# +# where r^n are base 2^26 digits of degrees of multiplier key. There are +# 5 digits, but last four are interleaved with multiples of 5, totalling +# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4. + +my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) = + map("%xmm$_",(0..15)); + +$code.=<<___; +.type __poly1305_block,\@abi-omnipotent +.align 32 +__poly1305_block: + push $ctx +___ + &poly1305_iteration(); +$code.=<<___; + pop $ctx + RET +.size __poly1305_block,.-__poly1305_block + +.type __poly1305_init_avx,\@abi-omnipotent +.align 32 +__poly1305_init_avx: + push %rbp + mov %rsp,%rbp + mov $r0,$h0 + mov $r1,$h1 + xor $h2,$h2 + + lea 48+64($ctx),$ctx # size optimization + + mov $r1,%rax + call __poly1305_block # r^2 + + mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26 + mov \$0x3ffffff,%edx + mov $h0,$d1 + and $h0#d,%eax + mov $r0,$d2 + and $r0#d,%edx + mov %eax,`16*0+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*0+4-64`($ctx) + shr \$26,$d2 + + mov \$0x3ffffff,%eax + mov \$0x3ffffff,%edx + and $d1#d,%eax + and $d2#d,%edx + mov %eax,`16*1+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*1+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*2+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*2+4-64`($ctx) + shr \$26,$d2 + + mov $h1,%rax + mov $r1,%rdx + shl \$12,%rax + shl \$12,%rdx + or $d1,%rax + or $d2,%rdx + and \$0x3ffffff,%eax + and \$0x3ffffff,%edx + mov %eax,`16*3+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*3+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*4+0-64`($ctx) + mov $h1,$d1 + mov %edx,`16*4+4-64`($ctx) + mov $r1,$d2 + + mov \$0x3ffffff,%eax + mov \$0x3ffffff,%edx + shr \$14,$d1 + shr \$14,$d2 + and $d1#d,%eax + and $d2#d,%edx + mov %eax,`16*5+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*5+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*6+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*6+4-64`($ctx) + shr \$26,$d2 + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+0-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d2#d,`16*7+4-64`($ctx) + lea ($d2,$d2,4),$d2 # *5 + mov $d1#d,`16*8+0-64`($ctx) + mov $d2#d,`16*8+4-64`($ctx) + + mov $r1,%rax + call __poly1305_block # r^3 + + mov \$0x3ffffff,%eax # save r^3 base 2^26 + mov $h0,$d1 + and $h0#d,%eax + shr \$26,$d1 + mov %eax,`16*0+12-64`($ctx) + + mov \$0x3ffffff,%edx + and $d1#d,%edx + mov %edx,`16*1+12-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*2+12-64`($ctx) + + mov $h1,%rax + shl \$12,%rax + or $d1,%rax + and \$0x3ffffff,%eax + mov %eax,`16*3+12-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov $h1,$d1 + mov %eax,`16*4+12-64`($ctx) + + mov \$0x3ffffff,%edx + shr \$14,$d1 + and $d1#d,%edx + mov %edx,`16*5+12-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*6+12-64`($ctx) + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+12-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d1#d,`16*8+12-64`($ctx) + + mov $r1,%rax + call __poly1305_block # r^4 + + mov \$0x3ffffff,%eax # save r^4 base 2^26 + mov $h0,$d1 + and $h0#d,%eax + shr \$26,$d1 + mov %eax,`16*0+8-64`($ctx) + + mov \$0x3ffffff,%edx + and $d1#d,%edx + mov %edx,`16*1+8-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*2+8-64`($ctx) + + mov $h1,%rax + shl \$12,%rax + or $d1,%rax + and \$0x3ffffff,%eax + mov %eax,`16*3+8-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov $h1,$d1 + mov %eax,`16*4+8-64`($ctx) + + mov \$0x3ffffff,%edx + shr \$14,$d1 + and $d1#d,%edx + mov %edx,`16*5+8-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*6+8-64`($ctx) + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+8-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d1#d,`16*8+8-64`($ctx) + + lea -48-64($ctx),$ctx # size [de-]optimization + pop %rbp + RET +.size __poly1305_init_avx,.-__poly1305_init_avx +___ + +&declare_function("poly1305_blocks_avx", 32, 4); +$code.=<<___; +.cfi_startproc + mov 20($ctx),%r8d # is_base2_26 + cmp \$128,$len + jae .Lblocks_avx + test %r8d,%r8d + jz .Lblocks + +.Lblocks_avx: + and \$-16,$len + jz .Lno_data_avx + + vzeroupper + + test %r8d,%r8d + jz .Lbase2_64_avx + + test \$31,$len + jz .Leven_avx + + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lblocks_avx_body: + + mov $len,%r15 # reassign $len + + mov 0($ctx),$d1 # load hash value + mov 8($ctx),$d2 + mov 16($ctx),$h2#d + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + ################################# base 2^26 -> base 2^64 + mov $d1#d,$h0#d + and \$`-1*(1<<31)`,$d1 + mov $d2,$r1 # borrow $r1 + mov $d2#d,$h1#d + and \$`-1*(1<<31)`,$d2 + + shr \$6,$d1 + shl \$52,$r1 + add $d1,$h0 + shr \$12,$h1 + shr \$18,$d2 + add $r1,$h0 + adc $d2,$h1 + + mov $h2,$d1 + shl \$40,$d1 + shr \$24,$h2 + add $d1,$h1 + adc \$0,$h2 # can be partially reduced... + + mov \$-4,$d2 # ... so reduce + mov $h2,$d1 + and $h2,$d2 + shr \$2,$d1 + and \$3,$h2 + add $d2,$d1 # =*5 + add $d1,$h0 + adc \$0,$h1 + adc \$0,$h2 + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + + call __poly1305_block + + test $padbit,$padbit # if $padbit is zero, + jz .Lstore_base2_64_avx # store hash in base 2^64 format + + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$r0 + mov $h1,$r1 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$r0 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $r0,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$r1 + and \$0x3ffffff,$h1 # h[3] + or $r1,$h2 # h[4] + + sub \$16,%r15 + jz .Lstore_base2_26_avx + + vmovd %rax#d,$H0 + vmovd %rdx#d,$H1 + vmovd $h0#d,$H2 + vmovd $h1#d,$H3 + vmovd $h2#d,$H4 + jmp .Lproceed_avx + +.align 32 +.Lstore_base2_64_avx: + mov $h0,0($ctx) + mov $h1,8($ctx) + mov $h2,16($ctx) # note that is_base2_26 is zeroed + jmp .Ldone_avx + +.align 16 +.Lstore_base2_26_avx: + mov %rax#d,0($ctx) # store hash value base 2^26 + mov %rdx#d,4($ctx) + mov $h0#d,8($ctx) + mov $h1#d,12($ctx) + mov $h2#d,16($ctx) +.align 16 +.Ldone_avx: + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lno_data_avx: +.Lblocks_avx_epilogue: + RET +.cfi_endproc + +.align 32 +.Lbase2_64_avx: +.cfi_startproc + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lbase2_64_avx_body: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2#d + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + test \$31,$len + jz .Linit_avx + + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + +.Linit_avx: + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$d1 + mov $h1,$d2 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$d1 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $d1,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$d2 + and \$0x3ffffff,$h1 # h[3] + or $d2,$h2 # h[4] + + vmovd %rax#d,$H0 + vmovd %rdx#d,$H1 + vmovd $h0#d,$H2 + vmovd $h1#d,$H3 + vmovd $h2#d,$H4 + movl \$1,20($ctx) # set is_base2_26 + + call __poly1305_init_avx + +.Lproceed_avx: + mov %r15,$len + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lbase2_64_avx_epilogue: + jmp .Ldo_avx +.cfi_endproc + +.align 32 +.Leven_avx: +.cfi_startproc + vmovd 4*0($ctx),$H0 # load hash value + vmovd 4*1($ctx),$H1 + vmovd 4*2($ctx),$H2 + vmovd 4*3($ctx),$H3 + vmovd 4*4($ctx),$H4 + +.Ldo_avx: +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + and \$-32,%rsp + sub \$-8,%rsp + lea -0x58(%rsp),%r11 + sub \$0x178,%rsp +___ +$code.=<<___ if ($win64); + lea -0xf8(%rsp),%r11 + sub \$0x218,%rsp + vmovdqa %xmm6,0x50(%r11) + vmovdqa %xmm7,0x60(%r11) + vmovdqa %xmm8,0x70(%r11) + vmovdqa %xmm9,0x80(%r11) + vmovdqa %xmm10,0x90(%r11) + vmovdqa %xmm11,0xa0(%r11) + vmovdqa %xmm12,0xb0(%r11) + vmovdqa %xmm13,0xc0(%r11) + vmovdqa %xmm14,0xd0(%r11) + vmovdqa %xmm15,0xe0(%r11) +.Ldo_avx_body: +___ +$code.=<<___; + sub \$64,$len + lea -32($inp),%rax + cmovc %rax,$inp + + vmovdqu `16*3`($ctx),$D4 # preload r0^2 + lea `16*3+64`($ctx),$ctx # size optimization + lea .Lconst(%rip),%rcx + + ################################################################ + # load input + vmovdqu 16*2($inp),$T0 + vmovdqu 16*3($inp),$T1 + vmovdqa 64(%rcx),$MASK # .Lmask26 + + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + + vpsrlq \$40,$T4,$T4 # 4 + vpsrlq \$26,$T0,$T1 + vpand $MASK,$T0,$T0 # 0 + vpsrlq \$4,$T3,$T2 + vpand $MASK,$T1,$T1 # 1 + vpsrlq \$30,$T3,$T3 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + jbe .Lskip_loop_avx + + # expand and copy pre-calculated table to stack + vmovdqu `16*1-64`($ctx),$D1 + vmovdqu `16*2-64`($ctx),$D2 + vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434 + vpshufd \$0x44,$D4,$D0 # xx12 -> 1212 + vmovdqa $D3,-0x90(%r11) + vmovdqa $D0,0x00(%rsp) + vpshufd \$0xEE,$D1,$D4 + vmovdqu `16*3-64`($ctx),$D0 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D4,-0x80(%r11) + vmovdqa $D1,0x10(%rsp) + vpshufd \$0xEE,$D2,$D3 + vmovdqu `16*4-64`($ctx),$D1 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D3,-0x70(%r11) + vmovdqa $D2,0x20(%rsp) + vpshufd \$0xEE,$D0,$D4 + vmovdqu `16*5-64`($ctx),$D2 + vpshufd \$0x44,$D0,$D0 + vmovdqa $D4,-0x60(%r11) + vmovdqa $D0,0x30(%rsp) + vpshufd \$0xEE,$D1,$D3 + vmovdqu `16*6-64`($ctx),$D0 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D3,-0x50(%r11) + vmovdqa $D1,0x40(%rsp) + vpshufd \$0xEE,$D2,$D4 + vmovdqu `16*7-64`($ctx),$D1 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D4,-0x40(%r11) + vmovdqa $D2,0x50(%rsp) + vpshufd \$0xEE,$D0,$D3 + vmovdqu `16*8-64`($ctx),$D2 + vpshufd \$0x44,$D0,$D0 + vmovdqa $D3,-0x30(%r11) + vmovdqa $D0,0x60(%rsp) + vpshufd \$0xEE,$D1,$D4 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D4,-0x20(%r11) + vmovdqa $D1,0x70(%rsp) + vpshufd \$0xEE,$D2,$D3 + vmovdqa 0x00(%rsp),$D4 # preload r0^2 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D3,-0x10(%r11) + vmovdqa $D2,0x80(%rsp) + + jmp .Loop_avx + +.align 32 +.Loop_avx: + ################################################################ + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + # \___________________/ + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + # \___________________/ \____________________/ + # + # Note that we start with inp[2:3]*r^2. This is because it + # doesn't depend on reduction in previous iteration. + ################################################################ + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # though note that $Tx and $Hx are "reversed" in this section, + # and $D4 is preloaded with r0^2... + + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 + vmovdqa $H2,0x20(%r11) # offload hash + vpmuludq $T2,$D4,$D2 # d3 = h2*r0 + vmovdqa 0x10(%rsp),$H2 # r1^2 + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 + + vmovdqa $H0,0x00(%r11) # + vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1 + vmovdqa $H1,0x10(%r11) # + vpmuludq $T3,$H2,$H1 # h3*r1 + vpaddq $H0,$D0,$D0 # d0 += h4*s1 + vpaddq $H1,$D4,$D4 # d4 += h3*r1 + vmovdqa $H3,0x30(%r11) # + vpmuludq $T2,$H2,$H0 # h2*r1 + vpmuludq $T1,$H2,$H1 # h1*r1 + vpaddq $H0,$D3,$D3 # d3 += h2*r1 + vmovdqa 0x30(%rsp),$H3 # r2^2 + vpaddq $H1,$D2,$D2 # d2 += h1*r1 + vmovdqa $H4,0x40(%r11) # + vpmuludq $T0,$H2,$H2 # h0*r1 + vpmuludq $T2,$H3,$H0 # h2*r2 + vpaddq $H2,$D1,$D1 # d1 += h0*r1 + + vmovdqa 0x40(%rsp),$H4 # s2^2 + vpaddq $H0,$D4,$D4 # d4 += h2*r2 + vpmuludq $T1,$H3,$H1 # h1*r2 + vpmuludq $T0,$H3,$H3 # h0*r2 + vpaddq $H1,$D3,$D3 # d3 += h1*r2 + vmovdqa 0x50(%rsp),$H2 # r3^2 + vpaddq $H3,$D2,$D2 # d2 += h0*r2 + vpmuludq $T4,$H4,$H0 # h4*s2 + vpmuludq $T3,$H4,$H4 # h3*s2 + vpaddq $H0,$D1,$D1 # d1 += h4*s2 + vmovdqa 0x60(%rsp),$H3 # s3^2 + vpaddq $H4,$D0,$D0 # d0 += h3*s2 + + vmovdqa 0x80(%rsp),$H4 # s4^2 + vpmuludq $T1,$H2,$H1 # h1*r3 + vpmuludq $T0,$H2,$H2 # h0*r3 + vpaddq $H1,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $T4,$H3,$H0 # h4*s3 + vpmuludq $T3,$H3,$H1 # h3*s3 + vpaddq $H0,$D2,$D2 # d2 += h4*s3 + vmovdqu 16*0($inp),$H0 # load input + vpaddq $H1,$D1,$D1 # d1 += h3*s3 + vpmuludq $T2,$H3,$H3 # h2*s3 + vpmuludq $T2,$H4,$T2 # h2*s4 + vpaddq $H3,$D0,$D0 # d0 += h2*s3 + + vmovdqu 16*1($inp),$H1 # + vpaddq $T2,$D1,$D1 # d1 += h2*s4 + vpmuludq $T3,$H4,$T3 # h3*s4 + vpmuludq $T4,$H4,$T4 # h4*s4 + vpsrldq \$6,$H0,$H2 # splat input + vpaddq $T3,$D2,$D2 # d2 += h3*s4 + vpaddq $T4,$D3,$D3 # d3 += h4*s4 + vpsrldq \$6,$H1,$H3 # + vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4 + vpmuludq $T1,$H4,$T0 # h1*s4 + vpunpckhqdq $H1,$H0,$H4 # 4 + vpaddq $T4,$D4,$D4 # d4 += h0*r4 + vmovdqa -0x90(%r11),$T4 # r0^4 + vpaddq $T0,$D0,$D0 # d0 += h1*s4 + + vpunpcklqdq $H1,$H0,$H0 # 0:1 + vpunpcklqdq $H3,$H2,$H3 # 2:3 + + #vpsrlq \$40,$H4,$H4 # 4 + vpsrldq \$`40/8`,$H4,$H4 # 4 + vpsrlq \$26,$H0,$H1 + vpand $MASK,$H0,$H0 # 0 + vpsrlq \$4,$H3,$H2 + vpand $MASK,$H1,$H1 # 1 + vpand 0(%rcx),$H4,$H4 # .Lmask24 + vpsrlq \$30,$H3,$H3 + vpand $MASK,$H2,$H2 # 2 + vpand $MASK,$H3,$H3 # 3 + vpor 32(%rcx),$H4,$H4 # padbit, yes, always + + vpaddq 0x00(%r11),$H0,$H0 # add hash value + vpaddq 0x10(%r11),$H1,$H1 + vpaddq 0x20(%r11),$H2,$H2 + vpaddq 0x30(%r11),$H3,$H3 + vpaddq 0x40(%r11),$H4,$H4 + + lea 16*2($inp),%rax + lea 16*4($inp),$inp + sub \$64,$len + cmovc %rax,$inp + + ################################################################ + # Now we accumulate (inp[0:1]+hash)*r^4 + ################################################################ + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + vpmuludq $H0,$T4,$T0 # h0*r0 + vpmuludq $H1,$T4,$T1 # h1*r0 + vpaddq $T0,$D0,$D0 + vpaddq $T1,$D1,$D1 + vmovdqa -0x80(%r11),$T2 # r1^4 + vpmuludq $H2,$T4,$T0 # h2*r0 + vpmuludq $H3,$T4,$T1 # h3*r0 + vpaddq $T0,$D2,$D2 + vpaddq $T1,$D3,$D3 + vpmuludq $H4,$T4,$T4 # h4*r0 + vpmuludq -0x70(%r11),$H4,$T0 # h4*s1 + vpaddq $T4,$D4,$D4 + + vpaddq $T0,$D0,$D0 # d0 += h4*s1 + vpmuludq $H2,$T2,$T1 # h2*r1 + vpmuludq $H3,$T2,$T0 # h3*r1 + vpaddq $T1,$D3,$D3 # d3 += h2*r1 + vmovdqa -0x60(%r11),$T3 # r2^4 + vpaddq $T0,$D4,$D4 # d4 += h3*r1 + vpmuludq $H1,$T2,$T1 # h1*r1 + vpmuludq $H0,$T2,$T2 # h0*r1 + vpaddq $T1,$D2,$D2 # d2 += h1*r1 + vpaddq $T2,$D1,$D1 # d1 += h0*r1 + + vmovdqa -0x50(%r11),$T4 # s2^4 + vpmuludq $H2,$T3,$T0 # h2*r2 + vpmuludq $H1,$T3,$T1 # h1*r2 + vpaddq $T0,$D4,$D4 # d4 += h2*r2 + vpaddq $T1,$D3,$D3 # d3 += h1*r2 + vmovdqa -0x40(%r11),$T2 # r3^4 + vpmuludq $H0,$T3,$T3 # h0*r2 + vpmuludq $H4,$T4,$T0 # h4*s2 + vpaddq $T3,$D2,$D2 # d2 += h0*r2 + vpaddq $T0,$D1,$D1 # d1 += h4*s2 + vmovdqa -0x30(%r11),$T3 # s3^4 + vpmuludq $H3,$T4,$T4 # h3*s2 + vpmuludq $H1,$T2,$T1 # h1*r3 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + + vmovdqa -0x10(%r11),$T4 # s4^4 + vpaddq $T1,$D4,$D4 # d4 += h1*r3 + vpmuludq $H0,$T2,$T2 # h0*r3 + vpmuludq $H4,$T3,$T0 # h4*s3 + vpaddq $T2,$D3,$D3 # d3 += h0*r3 + vpaddq $T0,$D2,$D2 # d2 += h4*s3 + vmovdqu 16*2($inp),$T0 # load input + vpmuludq $H3,$T3,$T2 # h3*s3 + vpmuludq $H2,$T3,$T3 # h2*s3 + vpaddq $T2,$D1,$D1 # d1 += h3*s3 + vmovdqu 16*3($inp),$T1 # + vpaddq $T3,$D0,$D0 # d0 += h2*s3 + + vpmuludq $H2,$T4,$H2 # h2*s4 + vpmuludq $H3,$T4,$H3 # h3*s4 + vpsrldq \$6,$T0,$T2 # splat input + vpaddq $H2,$D1,$D1 # d1 += h2*s4 + vpmuludq $H4,$T4,$H4 # h4*s4 + vpsrldq \$6,$T1,$T3 # + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4 + vpmuludq -0x20(%r11),$H0,$H4 # h0*r4 + vpmuludq $H1,$T4,$H0 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + + #vpsrlq \$40,$T4,$T4 # 4 + vpsrldq \$`40/8`,$T4,$T4 # 4 + vpsrlq \$26,$T0,$T1 + vmovdqa 0x00(%rsp),$D4 # preload r0^2 + vpand $MASK,$T0,$T0 # 0 + vpsrlq \$4,$T3,$T2 + vpand $MASK,$T1,$T1 # 1 + vpand 0(%rcx),$T4,$T4 # .Lmask24 + vpsrlq \$30,$T3,$T3 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + ################################################################ + # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + # and P. Schwabe + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D0 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D0,$H0,$H0 + vpsllq \$2,$D0,$D0 + vpaddq $D0,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + ja .Loop_avx + +.Lskip_loop_avx: + ################################################################ + # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2 + add \$32,$len + jnz .Long_tail_avx + + vpaddq $H2,$T2,$T2 + vpaddq $H0,$T0,$T0 + vpaddq $H1,$T1,$T1 + vpaddq $H3,$T3,$T3 + vpaddq $H4,$T4,$T4 + +.Long_tail_avx: + vmovdqa $H2,0x20(%r11) + vmovdqa $H0,0x00(%r11) + vmovdqa $H1,0x10(%r11) + vmovdqa $H3,0x30(%r11) + vmovdqa $H4,0x40(%r11) + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + vpmuludq $T2,$D4,$D2 # d2 = h2*r0 + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 + vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 + + vpmuludq $T3,$H2,$H0 # h3*r1 + vpaddq $H0,$D4,$D4 # d4 += h3*r1 + vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n + vpmuludq $T2,$H2,$H1 # h2*r1 + vpaddq $H1,$D3,$D3 # d3 += h2*r1 + vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n + vpmuludq $T1,$H2,$H0 # h1*r1 + vpaddq $H0,$D2,$D2 # d2 += h1*r1 + vpmuludq $T0,$H2,$H2 # h0*r1 + vpaddq $H2,$D1,$D1 # d1 += h0*r1 + vpmuludq $T4,$H3,$H3 # h4*s1 + vpaddq $H3,$D0,$D0 # d0 += h4*s1 + + vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n + vpmuludq $T2,$H4,$H1 # h2*r2 + vpaddq $H1,$D4,$D4 # d4 += h2*r2 + vpmuludq $T1,$H4,$H0 # h1*r2 + vpaddq $H0,$D3,$D3 # d3 += h1*r2 + vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n + vpmuludq $T0,$H4,$H4 # h0*r2 + vpaddq $H4,$D2,$D2 # d2 += h0*r2 + vpmuludq $T4,$H2,$H1 # h4*s2 + vpaddq $H1,$D1,$D1 # d1 += h4*s2 + vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n + vpmuludq $T3,$H2,$H2 # h3*s2 + vpaddq $H2,$D0,$D0 # d0 += h3*s2 + + vpmuludq $T1,$H3,$H0 # h1*r3 + vpaddq $H0,$D4,$D4 # d4 += h1*r3 + vpmuludq $T0,$H3,$H3 # h0*r3 + vpaddq $H3,$D3,$D3 # d3 += h0*r3 + vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n + vpmuludq $T4,$H4,$H1 # h4*s3 + vpaddq $H1,$D2,$D2 # d2 += h4*s3 + vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n + vpmuludq $T3,$H4,$H0 # h3*s3 + vpaddq $H0,$D1,$D1 # d1 += h3*s3 + vpmuludq $T2,$H4,$H4 # h2*s3 + vpaddq $H4,$D0,$D0 # d0 += h2*s3 + + vpmuludq $T0,$H2,$H2 # h0*r4 + vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4 + vpmuludq $T4,$H3,$H1 # h4*s4 + vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4 + vpmuludq $T3,$H3,$H0 # h3*s4 + vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4 + vpmuludq $T2,$H3,$H1 # h2*s4 + vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4 + vpmuludq $T1,$H3,$H3 # h1*s4 + vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4 + + jz .Lshort_tail_avx + + vmovdqu 16*0($inp),$H0 # load input + vmovdqu 16*1($inp),$H1 + + vpsrldq \$6,$H0,$H2 # splat input + vpsrldq \$6,$H1,$H3 + vpunpckhqdq $H1,$H0,$H4 # 4 + vpunpcklqdq $H1,$H0,$H0 # 0:1 + vpunpcklqdq $H3,$H2,$H3 # 2:3 + + vpsrlq \$40,$H4,$H4 # 4 + vpsrlq \$26,$H0,$H1 + vpand $MASK,$H0,$H0 # 0 + vpsrlq \$4,$H3,$H2 + vpand $MASK,$H1,$H1 # 1 + vpsrlq \$30,$H3,$H3 + vpand $MASK,$H2,$H2 # 2 + vpand $MASK,$H3,$H3 # 3 + vpor 32(%rcx),$H4,$H4 # padbit, yes, always + + vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4 + vpaddq 0x00(%r11),$H0,$H0 + vpaddq 0x10(%r11),$H1,$H1 + vpaddq 0x20(%r11),$H2,$H2 + vpaddq 0x30(%r11),$H3,$H3 + vpaddq 0x40(%r11),$H4,$H4 + + ################################################################ + # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate + + vpmuludq $H0,$T4,$T0 # h0*r0 + vpaddq $T0,$D0,$D0 # d0 += h0*r0 + vpmuludq $H1,$T4,$T1 # h1*r0 + vpaddq $T1,$D1,$D1 # d1 += h1*r0 + vpmuludq $H2,$T4,$T0 # h2*r0 + vpaddq $T0,$D2,$D2 # d2 += h2*r0 + vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n + vpmuludq $H3,$T4,$T1 # h3*r0 + vpaddq $T1,$D3,$D3 # d3 += h3*r0 + vpmuludq $H4,$T4,$T4 # h4*r0 + vpaddq $T4,$D4,$D4 # d4 += h4*r0 + + vpmuludq $H3,$T2,$T0 # h3*r1 + vpaddq $T0,$D4,$D4 # d4 += h3*r1 + vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1 + vpmuludq $H2,$T2,$T1 # h2*r1 + vpaddq $T1,$D3,$D3 # d3 += h2*r1 + vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2 + vpmuludq $H1,$T2,$T0 # h1*r1 + vpaddq $T0,$D2,$D2 # d2 += h1*r1 + vpmuludq $H0,$T2,$T2 # h0*r1 + vpaddq $T2,$D1,$D1 # d1 += h0*r1 + vpmuludq $H4,$T3,$T3 # h4*s1 + vpaddq $T3,$D0,$D0 # d0 += h4*s1 + + vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2 + vpmuludq $H2,$T4,$T1 # h2*r2 + vpaddq $T1,$D4,$D4 # d4 += h2*r2 + vpmuludq $H1,$T4,$T0 # h1*r2 + vpaddq $T0,$D3,$D3 # d3 += h1*r2 + vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3 + vpmuludq $H0,$T4,$T4 # h0*r2 + vpaddq $T4,$D2,$D2 # d2 += h0*r2 + vpmuludq $H4,$T2,$T1 # h4*s2 + vpaddq $T1,$D1,$D1 # d1 += h4*s2 + vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3 + vpmuludq $H3,$T2,$T2 # h3*s2 + vpaddq $T2,$D0,$D0 # d0 += h3*s2 + + vpmuludq $H1,$T3,$T0 # h1*r3 + vpaddq $T0,$D4,$D4 # d4 += h1*r3 + vpmuludq $H0,$T3,$T3 # h0*r3 + vpaddq $T3,$D3,$D3 # d3 += h0*r3 + vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4 + vpmuludq $H4,$T4,$T1 # h4*s3 + vpaddq $T1,$D2,$D2 # d2 += h4*s3 + vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4 + vpmuludq $H3,$T4,$T0 # h3*s3 + vpaddq $T0,$D1,$D1 # d1 += h3*s3 + vpmuludq $H2,$T4,$T4 # h2*s3 + vpaddq $T4,$D0,$D0 # d0 += h2*s3 + + vpmuludq $H0,$T2,$T2 # h0*r4 + vpaddq $T2,$D4,$D4 # d4 += h0*r4 + vpmuludq $H4,$T3,$T1 # h4*s4 + vpaddq $T1,$D3,$D3 # d3 += h4*s4 + vpmuludq $H3,$T3,$T0 # h3*s4 + vpaddq $T0,$D2,$D2 # d2 += h3*s4 + vpmuludq $H2,$T3,$T1 # h2*s4 + vpaddq $T1,$D1,$D1 # d1 += h2*s4 + vpmuludq $H1,$T3,$T3 # h1*s4 + vpaddq $T3,$D0,$D0 # d0 += h1*s4 + +.Lshort_tail_avx: + ################################################################ + # horizontal addition + + vpsrldq \$8,$D4,$T4 + vpsrldq \$8,$D3,$T3 + vpsrldq \$8,$D1,$T1 + vpsrldq \$8,$D0,$T0 + vpsrldq \$8,$D2,$T2 + vpaddq $T3,$D3,$D3 + vpaddq $T4,$D4,$D4 + vpaddq $T0,$D0,$D0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$D2,$D2 + + ################################################################ + # lazy reduction + + vpsrlq \$26,$D3,$H3 + vpand $MASK,$D3,$D3 + vpaddq $H3,$D4,$D4 # h3 -> h4 + + vpsrlq \$26,$D0,$H0 + vpand $MASK,$D0,$D0 + vpaddq $H0,$D1,$D1 # h0 -> h1 + + vpsrlq \$26,$D4,$H4 + vpand $MASK,$D4,$D4 + + vpsrlq \$26,$D1,$H1 + vpand $MASK,$D1,$D1 + vpaddq $H1,$D2,$D2 # h1 -> h2 + + vpaddq $H4,$D0,$D0 + vpsllq \$2,$H4,$H4 + vpaddq $H4,$D0,$D0 # h4 -> h0 + + vpsrlq \$26,$D2,$H2 + vpand $MASK,$D2,$D2 + vpaddq $H2,$D3,$D3 # h2 -> h3 + + vpsrlq \$26,$D0,$H0 + vpand $MASK,$D0,$D0 + vpaddq $H0,$D1,$D1 # h0 -> h1 + + vpsrlq \$26,$D3,$H3 + vpand $MASK,$D3,$D3 + vpaddq $H3,$D4,$D4 # h3 -> h4 + + vmovd $D0,`4*0-48-64`($ctx) # save partially reduced + vmovd $D1,`4*1-48-64`($ctx) + vmovd $D2,`4*2-48-64`($ctx) + vmovd $D3,`4*3-48-64`($ctx) + vmovd $D4,`4*4-48-64`($ctx) +___ +$code.=<<___ if ($win64); + vmovdqa 0x50(%r11),%xmm6 + vmovdqa 0x60(%r11),%xmm7 + vmovdqa 0x70(%r11),%xmm8 + vmovdqa 0x80(%r11),%xmm9 + vmovdqa 0x90(%r11),%xmm10 + vmovdqa 0xa0(%r11),%xmm11 + vmovdqa 0xb0(%r11),%xmm12 + vmovdqa 0xc0(%r11),%xmm13 + vmovdqa 0xd0(%r11),%xmm14 + vmovdqa 0xe0(%r11),%xmm15 + lea 0xf8(%r11),%rsp +.Ldo_avx_epilogue: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + vzeroupper + RET +.cfi_endproc +___ +&end_function("poly1305_blocks_avx"); + +&declare_function("poly1305_emit_avx", 32, 3); +$code.=<<___; + cmpl \$0,20($ctx) # is_base2_26? + je .Lemit + + mov 0($ctx),%eax # load hash value base 2^26 + mov 4($ctx),%ecx + mov 8($ctx),%r8d + mov 12($ctx),%r11d + mov 16($ctx),%r10d + + shl \$26,%rcx # base 2^26 -> base 2^64 + mov %r8,%r9 + shl \$52,%r8 + add %rcx,%rax + shr \$12,%r9 + add %rax,%r8 # h0 + adc \$0,%r9 + + shl \$14,%r11 + mov %r10,%rax + shr \$24,%r10 + add %r11,%r9 + shl \$40,%rax + add %rax,%r9 # h1 + adc \$0,%r10 # h2 + + mov %r10,%rax # could be partially reduced, so reduce + mov %r10,%rcx + and \$3,%r10 + shr \$2,%rax + and \$-4,%rcx + add %rcx,%rax + add %rax,%r8 + adc \$0,%r9 + adc \$0,%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + RET +___ +&end_function("poly1305_emit_avx"); + +if ($avx>1) { + +my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = + map("%ymm$_",(0..15)); +my $S4=$MASK; + +sub poly1305_blocks_avxN { + my ($avx512) = @_; + my $suffix = $avx512 ? "_avx512" : ""; +$code.=<<___; +.cfi_startproc + mov 20($ctx),%r8d # is_base2_26 + cmp \$128,$len + jae .Lblocks_avx2$suffix + test %r8d,%r8d + jz .Lblocks + +.Lblocks_avx2$suffix: + and \$-16,$len + jz .Lno_data_avx2$suffix + + vzeroupper + + test %r8d,%r8d + jz .Lbase2_64_avx2$suffix + + test \$63,$len + jz .Leven_avx2$suffix + + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lblocks_avx2_body$suffix: + + mov $len,%r15 # reassign $len + + mov 0($ctx),$d1 # load hash value + mov 8($ctx),$d2 + mov 16($ctx),$h2#d + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + ################################# base 2^26 -> base 2^64 + mov $d1#d,$h0#d + and \$`-1*(1<<31)`,$d1 + mov $d2,$r1 # borrow $r1 + mov $d2#d,$h1#d + and \$`-1*(1<<31)`,$d2 + + shr \$6,$d1 + shl \$52,$r1 + add $d1,$h0 + shr \$12,$h1 + shr \$18,$d2 + add $r1,$h0 + adc $d2,$h1 + + mov $h2,$d1 + shl \$40,$d1 + shr \$24,$h2 + add $d1,$h1 + adc \$0,$h2 # can be partially reduced... + + mov \$-4,$d2 # ... so reduce + mov $h2,$d1 + and $h2,$d2 + shr \$2,$d1 + and \$3,$h2 + add $d2,$d1 # =*5 + add $d1,$h0 + adc \$0,$h1 + adc \$0,$h2 + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + +.Lbase2_26_pre_avx2$suffix: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + mov $r1,%rax + + test \$63,%r15 + jnz .Lbase2_26_pre_avx2$suffix + + test $padbit,$padbit # if $padbit is zero, + jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format + + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$r0 + mov $h1,$r1 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$r0 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $r0,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$r1 + and \$0x3ffffff,$h1 # h[3] + or $r1,$h2 # h[4] + + test %r15,%r15 + jz .Lstore_base2_26_avx2$suffix + + vmovd %rax#d,%x#$H0 + vmovd %rdx#d,%x#$H1 + vmovd $h0#d,%x#$H2 + vmovd $h1#d,%x#$H3 + vmovd $h2#d,%x#$H4 + jmp .Lproceed_avx2$suffix + +.align 32 +.Lstore_base2_64_avx2$suffix: + mov $h0,0($ctx) + mov $h1,8($ctx) + mov $h2,16($ctx) # note that is_base2_26 is zeroed + jmp .Ldone_avx2$suffix + +.align 16 +.Lstore_base2_26_avx2$suffix: + mov %rax#d,0($ctx) # store hash value base 2^26 + mov %rdx#d,4($ctx) + mov $h0#d,8($ctx) + mov $h1#d,12($ctx) + mov $h2#d,16($ctx) +.align 16 +.Ldone_avx2$suffix: + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lno_data_avx2$suffix: +.Lblocks_avx2_epilogue$suffix: + RET +.cfi_endproc + +.align 32 +.Lbase2_64_avx2$suffix: +.cfi_startproc + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lbase2_64_avx2_body$suffix: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2#d + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + test \$63,$len + jz .Linit_avx2$suffix + +.Lbase2_64_pre_avx2$suffix: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + mov $r1,%rax + + test \$63,%r15 + jnz .Lbase2_64_pre_avx2$suffix + +.Linit_avx2$suffix: + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$d1 + mov $h1,$d2 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$d1 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $d1,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$d2 + and \$0x3ffffff,$h1 # h[3] + or $d2,$h2 # h[4] + + vmovd %rax#d,%x#$H0 + vmovd %rdx#d,%x#$H1 + vmovd $h0#d,%x#$H2 + vmovd $h1#d,%x#$H3 + vmovd $h2#d,%x#$H4 + movl \$1,20($ctx) # set is_base2_26 + + call __poly1305_init_avx + +.Lproceed_avx2$suffix: + mov %r15,$len # restore $len +___ +$code.=<<___ if (!$kernel); + mov OPENSSL_ia32cap_P+8(%rip),%r9d + mov \$`(1<<31|1<<30|1<<16)`,%r11d +___ +$code.=<<___; + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lbase2_64_avx2_epilogue$suffix: + jmp .Ldo_avx2$suffix +.cfi_endproc + +.align 32 +.Leven_avx2$suffix: +.cfi_startproc +___ +$code.=<<___ if (!$kernel); + mov OPENSSL_ia32cap_P+8(%rip),%r9d +___ +$code.=<<___; + vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 + vmovd 4*1($ctx),%x#$H1 + vmovd 4*2($ctx),%x#$H2 + vmovd 4*3($ctx),%x#$H3 + vmovd 4*4($ctx),%x#$H4 + +.Ldo_avx2$suffix: +___ +$code.=<<___ if (!$kernel && $avx>2); + cmp \$512,$len + jb .Lskip_avx512 + and %r11d,%r9d + test \$`1<<16`,%r9d # check for AVX512F + jnz .Lblocks_avx512 +.Lskip_avx512$suffix: +___ +$code.=<<___ if ($avx > 2 && $avx512 && $kernel); + cmp \$512,$len + jae .Lblocks_avx512 +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + sub \$0x128,%rsp +___ +$code.=<<___ if ($win64); + lea 8(%rsp),%r10 + sub \$0x1c8,%rsp + vmovdqa %xmm6,-0xb0(%r10) + vmovdqa %xmm7,-0xa0(%r10) + vmovdqa %xmm8,-0x90(%r10) + vmovdqa %xmm9,-0x80(%r10) + vmovdqa %xmm10,-0x70(%r10) + vmovdqa %xmm11,-0x60(%r10) + vmovdqa %xmm12,-0x50(%r10) + vmovdqa %xmm13,-0x40(%r10) + vmovdqa %xmm14,-0x30(%r10) + vmovdqa %xmm15,-0x20(%r10) +.Ldo_avx2_body$suffix: +___ +$code.=<<___; + lea .Lconst(%rip),%rcx + lea 48+64($ctx),$ctx # size optimization + vmovdqa 96(%rcx),$T0 # .Lpermd_avx2 + + # expand and copy pre-calculated table to stack + vmovdqu `16*0-64`($ctx),%x#$T2 + and \$-512,%rsp + vmovdqu `16*1-64`($ctx),%x#$T3 + vmovdqu `16*2-64`($ctx),%x#$T4 + vmovdqu `16*3-64`($ctx),%x#$D0 + vmovdqu `16*4-64`($ctx),%x#$D1 + vmovdqu `16*5-64`($ctx),%x#$D2 + lea 0x90(%rsp),%rax # size optimization + vmovdqu `16*6-64`($ctx),%x#$D3 + vpermd $T2,$T0,$T2 # 00003412 -> 14243444 + vmovdqu `16*7-64`($ctx),%x#$D4 + vpermd $T3,$T0,$T3 + vmovdqu `16*8-64`($ctx),%x#$MASK + vpermd $T4,$T0,$T4 + vmovdqa $T2,0x00(%rsp) + vpermd $D0,$T0,$D0 + vmovdqa $T3,0x20-0x90(%rax) + vpermd $D1,$T0,$D1 + vmovdqa $T4,0x40-0x90(%rax) + vpermd $D2,$T0,$D2 + vmovdqa $D0,0x60-0x90(%rax) + vpermd $D3,$T0,$D3 + vmovdqa $D1,0x80-0x90(%rax) + vpermd $D4,$T0,$D4 + vmovdqa $D2,0xa0-0x90(%rax) + vpermd $MASK,$T0,$MASK + vmovdqa $D3,0xc0-0x90(%rax) + vmovdqa $D4,0xe0-0x90(%rax) + vmovdqa $MASK,0x100-0x90(%rax) + vmovdqa 64(%rcx),$MASK # .Lmask26 + + ################################################################ + # load input + vmovdqu 16*0($inp),%x#$T0 + vmovdqu 16*1($inp),%x#$T1 + vinserti128 \$1,16*2($inp),$T0,$T0 + vinserti128 \$1,16*3($inp),$T1,$T1 + lea 16*4($inp),$inp + + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpunpcklqdq $T3,$T2,$T2 # 2:3 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + + vpsrlq \$30,$T2,$T3 + vpsrlq \$4,$T2,$T2 + vpsrlq \$26,$T0,$T1 + vpsrlq \$40,$T4,$T4 # 4 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T0,$T0 # 0 + vpand $MASK,$T1,$T1 # 1 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + vpaddq $H2,$T2,$H2 # accumulate input + sub \$64,$len + jz .Ltail_avx2$suffix + jmp .Loop_avx2$suffix + +.align 32 +.Loop_avx2$suffix: + ################################################################ + # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 + # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 + # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2 + # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1 + # \________/\__________/ + ################################################################ + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + vmovdqa `32*0`(%rsp),$T0 # r0^4 + vpaddq $H1,$T1,$H1 + vmovdqa `32*1`(%rsp),$T1 # r1^4 + vpaddq $H3,$T3,$H3 + vmovdqa `32*3`(%rsp),$T2 # r2^4 + vpaddq $H4,$T4,$H4 + vmovdqa `32*6-0x90`(%rax),$T3 # s3^4 + vmovdqa `32*8-0x90`(%rax),$S4 # s4^4 + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # however, as h2 is "chronologically" first one available pull + # corresponding operations up, so it's + # + # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4 + # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4 + + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + + vpmuludq $H0,$T1,$T4 # h0*r1 + vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp + vpaddq $T4,$D1,$D1 # d1 += h0*r1 + vpaddq $H2,$D2,$D2 # d2 += h1*r1 + vpmuludq $H3,$T1,$T4 # h3*r1 + vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1 + vpaddq $T4,$D4,$D4 # d4 += h3*r1 + vpaddq $H2,$D0,$D0 # d0 += h4*s1 + vmovdqa `32*4-0x90`(%rax),$T1 # s2 + + vpmuludq $H0,$T0,$T4 # h0*r0 + vpmuludq $H1,$T0,$H2 # h1*r0 + vpaddq $T4,$D0,$D0 # d0 += h0*r0 + vpaddq $H2,$D1,$D1 # d1 += h1*r0 + vpmuludq $H3,$T0,$T4 # h3*r0 + vpmuludq $H4,$T0,$H2 # h4*r0 + vmovdqu 16*0($inp),%x#$T0 # load input + vpaddq $T4,$D3,$D3 # d3 += h3*r0 + vpaddq $H2,$D4,$D4 # d4 += h4*r0 + vinserti128 \$1,16*2($inp),$T0,$T0 + + vpmuludq $H3,$T1,$T4 # h3*s2 + vpmuludq $H4,$T1,$H2 # h4*s2 + vmovdqu 16*1($inp),%x#$T1 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + vpaddq $H2,$D1,$D1 # d1 += h4*s2 + vmovdqa `32*5-0x90`(%rax),$H2 # r3 + vpmuludq $H1,$T2,$T4 # h1*r2 + vpmuludq $H0,$T2,$T2 # h0*r2 + vpaddq $T4,$D3,$D3 # d3 += h1*r2 + vpaddq $T2,$D2,$D2 # d2 += h0*r2 + vinserti128 \$1,16*3($inp),$T1,$T1 + lea 16*4($inp),$inp + + vpmuludq $H1,$H2,$T4 # h1*r3 + vpmuludq $H0,$H2,$H2 # h0*r3 + vpsrldq \$6,$T0,$T2 # splat input + vpaddq $T4,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $H3,$T3,$T4 # h3*s3 + vpmuludq $H4,$T3,$H2 # h4*s3 + vpsrldq \$6,$T1,$T3 + vpaddq $T4,$D1,$D1 # d1 += h3*s3 + vpaddq $H2,$D2,$D2 # d2 += h4*s3 + vpunpckhqdq $T1,$T0,$T4 # 4 + + vpmuludq $H3,$S4,$H3 # h3*s4 + vpmuludq $H4,$S4,$H4 # h4*s4 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4 + vpmuludq $H1,$S4,$H0 # h1*s4 + vmovdqa 64(%rcx),$MASK # .Lmask26 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + ################################################################ + # lazy reduction (interleaved with tail of input splat) + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$4,$T3,$T2 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpand $MASK,$T2,$T2 # 2 + vpsrlq \$26,$T0,$T1 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpaddq $T2,$H2,$H2 # modulo-scheduled + vpsrlq \$30,$T3,$T3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$40,$T4,$T4 # 4 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpand $MASK,$T0,$T0 # 0 + vpand $MASK,$T1,$T1 # 1 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + sub \$64,$len + jnz .Loop_avx2$suffix + + .byte 0x66,0x90 +.Ltail_avx2$suffix: + ################################################################ + # while above multiplications were by r^4 in all lanes, in last + # iteration we multiply least significant lane by r^4 and most + # significant one by r, so copy of above except that references + # to the precomputed table are displaced by 4... + + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + vmovdqu `32*0+4`(%rsp),$T0 # r0^4 + vpaddq $H1,$T1,$H1 + vmovdqu `32*1+4`(%rsp),$T1 # r1^4 + vpaddq $H3,$T3,$H3 + vmovdqu `32*3+4`(%rsp),$T2 # r2^4 + vpaddq $H4,$T4,$H4 + vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4 + vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4 + + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + + vpmuludq $H0,$T1,$T4 # h0*r1 + vpmuludq $H1,$T1,$H2 # h1*r1 + vpaddq $T4,$D1,$D1 # d1 += h0*r1 + vpaddq $H2,$D2,$D2 # d2 += h1*r1 + vpmuludq $H3,$T1,$T4 # h3*r1 + vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1 + vpaddq $T4,$D4,$D4 # d4 += h3*r1 + vpaddq $H2,$D0,$D0 # d0 += h4*s1 + + vpmuludq $H0,$T0,$T4 # h0*r0 + vpmuludq $H1,$T0,$H2 # h1*r0 + vpaddq $T4,$D0,$D0 # d0 += h0*r0 + vmovdqu `32*4+4-0x90`(%rax),$T1 # s2 + vpaddq $H2,$D1,$D1 # d1 += h1*r0 + vpmuludq $H3,$T0,$T4 # h3*r0 + vpmuludq $H4,$T0,$H2 # h4*r0 + vpaddq $T4,$D3,$D3 # d3 += h3*r0 + vpaddq $H2,$D4,$D4 # d4 += h4*r0 + + vpmuludq $H3,$T1,$T4 # h3*s2 + vpmuludq $H4,$T1,$H2 # h4*s2 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + vpaddq $H2,$D1,$D1 # d1 += h4*s2 + vmovdqu `32*5+4-0x90`(%rax),$H2 # r3 + vpmuludq $H1,$T2,$T4 # h1*r2 + vpmuludq $H0,$T2,$T2 # h0*r2 + vpaddq $T4,$D3,$D3 # d3 += h1*r2 + vpaddq $T2,$D2,$D2 # d2 += h0*r2 + + vpmuludq $H1,$H2,$T4 # h1*r3 + vpmuludq $H0,$H2,$H2 # h0*r3 + vpaddq $T4,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $H3,$T3,$T4 # h3*s3 + vpmuludq $H4,$T3,$H2 # h4*s3 + vpaddq $T4,$D1,$D1 # d1 += h3*s3 + vpaddq $H2,$D2,$D2 # d2 += h4*s3 + + vpmuludq $H3,$S4,$H3 # h3*s4 + vpmuludq $H4,$S4,$H4 # h4*s4 + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 + vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4 + vpmuludq $H1,$S4,$H0 # h1*s4 + vmovdqa 64(%rcx),$MASK # .Lmask26 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + ################################################################ + # horizontal addition + + vpsrldq \$8,$D1,$T1 + vpsrldq \$8,$H2,$T2 + vpsrldq \$8,$H3,$T3 + vpsrldq \$8,$H4,$T4 + vpsrldq \$8,$H0,$T0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$H2,$H2 + vpaddq $T3,$H3,$H3 + vpaddq $T4,$H4,$H4 + vpaddq $T0,$H0,$H0 + + vpermq \$0x2,$H3,$T3 + vpermq \$0x2,$H4,$T4 + vpermq \$0x2,$H0,$T0 + vpermq \$0x2,$D1,$T1 + vpermq \$0x2,$H2,$T2 + vpaddq $T3,$H3,$H3 + vpaddq $T4,$H4,$H4 + vpaddq $T0,$H0,$H0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$H2,$H2 + + ################################################################ + # lazy reduction + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced + vmovd %x#$H1,`4*1-48-64`($ctx) + vmovd %x#$H2,`4*2-48-64`($ctx) + vmovd %x#$H3,`4*3-48-64`($ctx) + vmovd %x#$H4,`4*4-48-64`($ctx) +___ +$code.=<<___ if ($win64); + vmovdqa -0xb0(%r10),%xmm6 + vmovdqa -0xa0(%r10),%xmm7 + vmovdqa -0x90(%r10),%xmm8 + vmovdqa -0x80(%r10),%xmm9 + vmovdqa -0x70(%r10),%xmm10 + vmovdqa -0x60(%r10),%xmm11 + vmovdqa -0x50(%r10),%xmm12 + vmovdqa -0x40(%r10),%xmm13 + vmovdqa -0x30(%r10),%xmm14 + vmovdqa -0x20(%r10),%xmm15 + lea -8(%r10),%rsp +.Ldo_avx2_epilogue$suffix: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + vzeroupper + RET +.cfi_endproc +___ +if($avx > 2 && $avx512) { +my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); +my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); +my $PADBIT="%zmm30"; + +map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain +map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4)); +map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); +map(s/%y/%z/,($MASK)); + +$code.=<<___; +.cfi_startproc +.Lblocks_avx512: + mov \$15,%eax + kmovw %eax,%k2 +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + sub \$0x128,%rsp +___ +$code.=<<___ if ($win64); + lea 8(%rsp),%r10 + sub \$0x1c8,%rsp + vmovdqa %xmm6,-0xb0(%r10) + vmovdqa %xmm7,-0xa0(%r10) + vmovdqa %xmm8,-0x90(%r10) + vmovdqa %xmm9,-0x80(%r10) + vmovdqa %xmm10,-0x70(%r10) + vmovdqa %xmm11,-0x60(%r10) + vmovdqa %xmm12,-0x50(%r10) + vmovdqa %xmm13,-0x40(%r10) + vmovdqa %xmm14,-0x30(%r10) + vmovdqa %xmm15,-0x20(%r10) +.Ldo_avx512_body: +___ +$code.=<<___; + lea .Lconst(%rip),%rcx + lea 48+64($ctx),$ctx # size optimization + vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2 + + # expand pre-calculated table + vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0} + and \$-512,%rsp + vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1} + mov \$0x20,%rax + vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1} + vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2} + vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2} + vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3} + vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3} + vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4} + vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4} + vpermd $D0,$T2,$R0 # 00003412 -> 14243444 + vpbroadcastq 64(%rcx),$MASK # .Lmask26 + vpermd $D1,$T2,$R1 + vpermd $T0,$T2,$S1 + vpermd $D2,$T2,$R2 + vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0 + vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304 + vpermd $T1,$T2,$S2 + vmovdqu64 $R1,0x00(%rsp,%rax){%k2} + vpsrlq \$32,$R1,$T1 + vpermd $D3,$T2,$R3 + vmovdqa64 $S1,0x40(%rsp){%k2} + vpermd $T3,$T2,$S3 + vpermd $D4,$T2,$R4 + vmovdqu64 $R2,0x40(%rsp,%rax){%k2} + vpermd $T4,$T2,$S4 + vmovdqa64 $S2,0x80(%rsp){%k2} + vmovdqu64 $R3,0x80(%rsp,%rax){%k2} + vmovdqa64 $S3,0xc0(%rsp){%k2} + vmovdqu64 $R4,0xc0(%rsp,%rax){%k2} + vmovdqa64 $S4,0x100(%rsp){%k2} + + ################################################################ + # calculate 5th through 8th powers of the key + # + # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1 + # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2 + # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3 + # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4 + # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0 + + vpmuludq $T0,$R0,$D0 # d0 = r0'*r0 + vpmuludq $T0,$R1,$D1 # d1 = r0'*r1 + vpmuludq $T0,$R2,$D2 # d2 = r0'*r2 + vpmuludq $T0,$R3,$D3 # d3 = r0'*r3 + vpmuludq $T0,$R4,$D4 # d4 = r0'*r4 + vpsrlq \$32,$R2,$T2 + + vpmuludq $T1,$S4,$M0 + vpmuludq $T1,$R0,$M1 + vpmuludq $T1,$R1,$M2 + vpmuludq $T1,$R2,$M3 + vpmuludq $T1,$R3,$M4 + vpsrlq \$32,$R3,$T3 + vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4 + vpaddq $M1,$D1,$D1 # d1 += r1'*r0 + vpaddq $M2,$D2,$D2 # d2 += r1'*r1 + vpaddq $M3,$D3,$D3 # d3 += r1'*r2 + vpaddq $M4,$D4,$D4 # d4 += r1'*r3 + + vpmuludq $T2,$S3,$M0 + vpmuludq $T2,$S4,$M1 + vpmuludq $T2,$R1,$M3 + vpmuludq $T2,$R2,$M4 + vpmuludq $T2,$R0,$M2 + vpsrlq \$32,$R4,$T4 + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3 + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4 + vpaddq $M3,$D3,$D3 # d3 += r2'*r1 + vpaddq $M4,$D4,$D4 # d4 += r2'*r2 + vpaddq $M2,$D2,$D2 # d2 += r2'*r0 + + vpmuludq $T3,$S2,$M0 + vpmuludq $T3,$R0,$M3 + vpmuludq $T3,$R1,$M4 + vpmuludq $T3,$S3,$M1 + vpmuludq $T3,$S4,$M2 + vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2 + vpaddq $M3,$D3,$D3 # d3 += r3'*r0 + vpaddq $M4,$D4,$D4 # d4 += r3'*r1 + vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3 + vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4 + + vpmuludq $T4,$S4,$M3 + vpmuludq $T4,$R0,$M4 + vpmuludq $T4,$S1,$M0 + vpmuludq $T4,$S2,$M1 + vpmuludq $T4,$S3,$M2 + vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4 + vpaddq $M4,$D4,$D4 # d4 += r2'*r0 + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1 + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2 + vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3 + + ################################################################ + # load input + vmovdqu64 16*0($inp),%z#$T3 + vmovdqu64 16*4($inp),%z#$T4 + lea 16*8($inp),$inp + + ################################################################ + # lazy reduction + + vpsrlq \$26,$D3,$M3 + vpandq $MASK,$D3,$D3 + vpaddq $M3,$D4,$D4 # d3 -> d4 + + vpsrlq \$26,$D0,$M0 + vpandq $MASK,$D0,$D0 + vpaddq $M0,$D1,$D1 # d0 -> d1 + + vpsrlq \$26,$D4,$M4 + vpandq $MASK,$D4,$D4 + + vpsrlq \$26,$D1,$M1 + vpandq $MASK,$D1,$D1 + vpaddq $M1,$D2,$D2 # d1 -> d2 + + vpaddq $M4,$D0,$D0 + vpsllq \$2,$M4,$M4 + vpaddq $M4,$D0,$D0 # d4 -> d0 + + vpsrlq \$26,$D2,$M2 + vpandq $MASK,$D2,$D2 + vpaddq $M2,$D3,$D3 # d2 -> d3 + + vpsrlq \$26,$D0,$M0 + vpandq $MASK,$D0,$D0 + vpaddq $M0,$D1,$D1 # d0 -> d1 + + vpsrlq \$26,$D3,$M3 + vpandq $MASK,$D3,$D3 + vpaddq $M3,$D4,$D4 # d3 -> d4 + + ################################################################ + # at this point we have 14243444 in $R0-$S4 and 05060708 in + # $D0-$D4, ... + + vpunpcklqdq $T4,$T3,$T0 # transpose input + vpunpckhqdq $T4,$T3,$T4 + + # ... since input 64-bit lanes are ordered as 73625140, we could + # "vperm" it to 76543210 (here and in each loop iteration), *or* + # we could just flow along, hence the goal for $R0-$S4 is + # 1858286838784888 ... + + vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512: + mov \$0x7777,%eax + kmovw %eax,%k1 + + vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4--- + vpermd $R1,$M0,$R1 + vpermd $R2,$M0,$R2 + vpermd $R3,$M0,$R3 + vpermd $R4,$M0,$R4 + + vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888 + vpermd $D1,$M0,${R1}{%k1} + vpermd $D2,$M0,${R2}{%k1} + vpermd $D3,$M0,${R3}{%k1} + vpermd $D4,$M0,${R4}{%k1} + + vpslld \$2,$R1,$S1 # *5 + vpslld \$2,$R2,$S2 + vpslld \$2,$R3,$S3 + vpslld \$2,$R4,$S4 + vpaddd $R1,$S1,$S1 + vpaddd $R2,$S2,$S2 + vpaddd $R3,$S3,$S3 + vpaddd $R4,$S4,$S4 + + vpbroadcastq 32(%rcx),$PADBIT # .L129 + + vpsrlq \$52,$T0,$T2 # splat input + vpsllq \$12,$T4,$T3 + vporq $T3,$T2,$T2 + vpsrlq \$26,$T0,$T1 + vpsrlq \$14,$T4,$T3 + vpsrlq \$40,$T4,$T4 # 4 + vpandq $MASK,$T2,$T2 # 2 + vpandq $MASK,$T0,$T0 # 0 + #vpandq $MASK,$T1,$T1 # 1 + #vpandq $MASK,$T3,$T3 # 3 + #vporq $PADBIT,$T4,$T4 # padbit, yes, always + + vpaddq $H2,$T2,$H2 # accumulate input + sub \$192,$len + jbe .Ltail_avx512 + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: + ################################################################ + # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8 + # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7 + # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6 + # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5 + # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4 + # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3 + # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2 + # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1 + # \________/\___________/ + ################################################################ + #vpaddq $H2,$T2,$H2 # accumulate input + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # however, as h2 is "chronologically" first one available pull + # corresponding operations up, so it's + # + # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4 + # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0 + # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1 + # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2 + # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3 + + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 + vpaddq $H0,$T0,$H0 + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 + vpandq $MASK,$T1,$T1 # 1 + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 + vpandq $MASK,$T3,$T3 # 3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + vporq $PADBIT,$T4,$T4 # padbit, yes, always + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 + vpaddq $H1,$T1,$H1 # accumulate input + vpaddq $H3,$T3,$H3 + vpaddq $H4,$T4,$H4 + + vmovdqu64 16*0($inp),$T3 # load input + vmovdqu64 16*4($inp),$T4 + lea 16*8($inp),$inp + vpmuludq $H0,$R3,$M3 + vpmuludq $H0,$R4,$M4 + vpmuludq $H0,$R0,$M0 + vpmuludq $H0,$R1,$M1 + vpaddq $M3,$D3,$D3 # d3 += h0*r3 + vpaddq $M4,$D4,$D4 # d4 += h0*r4 + vpaddq $M0,$D0,$D0 # d0 += h0*r0 + vpaddq $M1,$D1,$D1 # d1 += h0*r1 + + vpmuludq $H1,$R2,$M3 + vpmuludq $H1,$R3,$M4 + vpmuludq $H1,$S4,$M0 + vpmuludq $H0,$R2,$M2 + vpaddq $M3,$D3,$D3 # d3 += h1*r2 + vpaddq $M4,$D4,$D4 # d4 += h1*r3 + vpaddq $M0,$D0,$D0 # d0 += h1*s4 + vpaddq $M2,$D2,$D2 # d2 += h0*r2 + + vpunpcklqdq $T4,$T3,$T0 # transpose input + vpunpckhqdq $T4,$T3,$T4 + + vpmuludq $H3,$R0,$M3 + vpmuludq $H3,$R1,$M4 + vpmuludq $H1,$R0,$M1 + vpmuludq $H1,$R1,$M2 + vpaddq $M3,$D3,$D3 # d3 += h3*r0 + vpaddq $M4,$D4,$D4 # d4 += h3*r1 + vpaddq $M1,$D1,$D1 # d1 += h1*r0 + vpaddq $M2,$D2,$D2 # d2 += h1*r1 + + vpmuludq $H4,$S4,$M3 + vpmuludq $H4,$R0,$M4 + vpmuludq $H3,$S2,$M0 + vpmuludq $H3,$S3,$M1 + vpaddq $M3,$D3,$D3 # d3 += h4*s4 + vpmuludq $H3,$S4,$M2 + vpaddq $M4,$D4,$D4 # d4 += h4*r0 + vpaddq $M0,$D0,$D0 # d0 += h3*s2 + vpaddq $M1,$D1,$D1 # d1 += h3*s3 + vpaddq $M2,$D2,$D2 # d2 += h3*s4 + + vpmuludq $H4,$S1,$M0 + vpmuludq $H4,$S2,$M1 + vpmuludq $H4,$S3,$M2 + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 + + ################################################################ + # lazy reduction (interleaved with input splat) + + vpsrlq \$52,$T0,$T2 # splat input + vpsllq \$12,$T4,$T3 + + vpsrlq \$26,$D3,$H3 + vpandq $MASK,$D3,$D3 + vpaddq $H3,$D4,$H4 # h3 -> h4 + + vporq $T3,$T2,$T2 + + vpsrlq \$26,$H0,$D0 + vpandq $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpandq $MASK,$T2,$T2 # 2 + + vpsrlq \$26,$H4,$D4 + vpandq $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpandq $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpaddq $T2,$H2,$H2 # modulo-scheduled + vpsrlq \$26,$T0,$T1 + + vpsrlq \$26,$H2,$D2 + vpandq $MASK,$H2,$H2 + vpaddq $D2,$D3,$H3 # h2 -> h3 + + vpsrlq \$14,$T4,$T3 + + vpsrlq \$26,$H0,$D0 + vpandq $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$40,$T4,$T4 # 4 + + vpsrlq \$26,$H3,$D3 + vpandq $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpandq $MASK,$T0,$T0 # 0 + #vpandq $MASK,$T1,$T1 # 1 + #vpandq $MASK,$T3,$T3 # 3 + #vporq $PADBIT,$T4,$T4 # padbit, yes, always + + sub \$128,$len + ja .Loop_avx512 + +.Ltail_avx512: + ################################################################ + # while above multiplications were by r^8 in all lanes, in last + # iteration we multiply least significant lane by r^8 and most + # significant one by r, that's why table gets shifted... + + vpsrlq \$32,$R0,$R0 # 0105020603070408 + vpsrlq \$32,$R1,$R1 + vpsrlq \$32,$R2,$R2 + vpsrlq \$32,$S3,$S3 + vpsrlq \$32,$S4,$S4 + vpsrlq \$32,$R3,$R3 + vpsrlq \$32,$R4,$R4 + vpsrlq \$32,$S1,$S1 + vpsrlq \$32,$S2,$S2 + + ################################################################ + # load either next or last 64 byte of input + lea ($inp,$len),$inp + + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 + vpandq $MASK,$T1,$T1 # 1 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + vpandq $MASK,$T3,$T3 # 3 + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 + vporq $PADBIT,$T4,$T4 # padbit, yes, always + vpaddq $H1,$T1,$H1 # accumulate input + vpaddq $H3,$T3,$H3 + vpaddq $H4,$T4,$H4 + + vmovdqu 16*0($inp),%x#$T0 + vpmuludq $H0,$R3,$M3 + vpmuludq $H0,$R4,$M4 + vpmuludq $H0,$R0,$M0 + vpmuludq $H0,$R1,$M1 + vpaddq $M3,$D3,$D3 # d3 += h0*r3 + vpaddq $M4,$D4,$D4 # d4 += h0*r4 + vpaddq $M0,$D0,$D0 # d0 += h0*r0 + vpaddq $M1,$D1,$D1 # d1 += h0*r1 + + vmovdqu 16*1($inp),%x#$T1 + vpmuludq $H1,$R2,$M3 + vpmuludq $H1,$R3,$M4 + vpmuludq $H1,$S4,$M0 + vpmuludq $H0,$R2,$M2 + vpaddq $M3,$D3,$D3 # d3 += h1*r2 + vpaddq $M4,$D4,$D4 # d4 += h1*r3 + vpaddq $M0,$D0,$D0 # d0 += h1*s4 + vpaddq $M2,$D2,$D2 # d2 += h0*r2 + + vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0 + vpmuludq $H3,$R0,$M3 + vpmuludq $H3,$R1,$M4 + vpmuludq $H1,$R0,$M1 + vpmuludq $H1,$R1,$M2 + vpaddq $M3,$D3,$D3 # d3 += h3*r0 + vpaddq $M4,$D4,$D4 # d4 += h3*r1 + vpaddq $M1,$D1,$D1 # d1 += h1*r0 + vpaddq $M2,$D2,$D2 # d2 += h1*r1 + + vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1 + vpmuludq $H4,$S4,$M3 + vpmuludq $H4,$R0,$M4 + vpmuludq $H3,$S2,$M0 + vpmuludq $H3,$S3,$M1 + vpmuludq $H3,$S4,$M2 + vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4 + vpaddq $M4,$D4,$D4 # d4 += h4*r0 + vpaddq $M0,$D0,$D0 # d0 += h3*s2 + vpaddq $M1,$D1,$D1 # d1 += h3*s3 + vpaddq $M2,$D2,$D2 # d2 += h3*s4 + + vpmuludq $H4,$S1,$M0 + vpmuludq $H4,$S2,$M1 + vpmuludq $H4,$S3,$M2 + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 + + ################################################################ + # horizontal addition + + mov \$1,%eax + vpermq \$0xb1,$H3,$D3 + vpermq \$0xb1,$D4,$H4 + vpermq \$0xb1,$H0,$D0 + vpermq \$0xb1,$H1,$D1 + vpermq \$0xb1,$H2,$D2 + vpaddq $D3,$H3,$H3 + vpaddq $D4,$H4,$H4 + vpaddq $D0,$H0,$H0 + vpaddq $D1,$H1,$H1 + vpaddq $D2,$H2,$H2 + + kmovw %eax,%k3 + vpermq \$0x2,$H3,$D3 + vpermq \$0x2,$H4,$D4 + vpermq \$0x2,$H0,$D0 + vpermq \$0x2,$H1,$D1 + vpermq \$0x2,$H2,$D2 + vpaddq $D3,$H3,$H3 + vpaddq $D4,$H4,$H4 + vpaddq $D0,$H0,$H0 + vpaddq $D1,$H1,$H1 + vpaddq $D2,$H2,$H2 + + vextracti64x4 \$0x1,$H3,%y#$D3 + vextracti64x4 \$0x1,$H4,%y#$D4 + vextracti64x4 \$0x1,$H0,%y#$D0 + vextracti64x4 \$0x1,$H1,%y#$D1 + vextracti64x4 \$0x1,$H2,%y#$D2 + vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case + vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2 + vpaddq $D0,$H0,${H0}{%k3}{z} + vpaddq $D1,$H1,${H1}{%k3}{z} + vpaddq $D2,$H2,${H2}{%k3}{z} +___ +map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT)); +map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK)); +$code.=<<___; + ################################################################ + # lazy reduction (interleaved with input splat) + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpunpcklqdq $T3,$T2,$T2 # 2:3 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpsrlq \$30,$T2,$T3 + vpsrlq \$4,$T2,$T2 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpsrlq \$26,$T0,$T1 + vpsrlq \$40,$T4,$T4 # 4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T0,$T0 # 0 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2 + vpand $MASK,$T1,$T1 # 1 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + vpaddq $D3,$H4,$H4 # h3 -> h4 + + lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 + add \$64,$len + jnz .Ltail_avx2$suffix + + vpsubq $T2,$H2,$H2 # undo input accumulation + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced + vmovd %x#$H1,`4*1-48-64`($ctx) + vmovd %x#$H2,`4*2-48-64`($ctx) + vmovd %x#$H3,`4*3-48-64`($ctx) + vmovd %x#$H4,`4*4-48-64`($ctx) + vzeroall +___ +$code.=<<___ if ($win64); + movdqa -0xb0(%r10),%xmm6 + movdqa -0xa0(%r10),%xmm7 + movdqa -0x90(%r10),%xmm8 + movdqa -0x80(%r10),%xmm9 + movdqa -0x70(%r10),%xmm10 + movdqa -0x60(%r10),%xmm11 + movdqa -0x50(%r10),%xmm12 + movdqa -0x40(%r10),%xmm13 + movdqa -0x30(%r10),%xmm14 + movdqa -0x20(%r10),%xmm15 + lea -8(%r10),%rsp +.Ldo_avx512_epilogue: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + RET +.cfi_endproc +___ + +} + +} + +&declare_function("poly1305_blocks_avx2", 32, 4); +poly1305_blocks_avxN(0); +&end_function("poly1305_blocks_avx2"); + +####################################################################### +if ($avx>2) { +# On entry we have input length divisible by 64. But since inner loop +# processes 128 bytes per iteration, cases when length is not divisible +# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this +# reason stack layout is kept identical to poly1305_blocks_avx2. If not +# for this tail, we wouldn't have to even allocate stack frame... + +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX512\n"; +} + +&declare_function("poly1305_blocks_avx512", 32, 4); +poly1305_blocks_avxN(1); +&end_function("poly1305_blocks_avx512"); + +if ($kernel) { + $code .= "#endif\n"; +} + +if (!$kernel && $avx>3) { +######################################################################## +# VPMADD52 version using 2^44 radix. +# +# One can argue that base 2^52 would be more natural. Well, even though +# some operations would be more natural, one has to recognize couple of +# things. Base 2^52 doesn't provide advantage over base 2^44 if you look +# at amount of multiply-n-accumulate operations. Secondly, it makes it +# impossible to pre-compute multiples of 5 [referred to as s[]/sN in +# reference implementations], which means that more such operations +# would have to be performed in inner loop, which in turn makes critical +# path longer. In other words, even though base 2^44 reduction might +# look less elegant, overall critical path is actually shorter... + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int64 h[3]; # current hash value base 2^44 +# unsigned __int64 s[2]; # key value*20 base 2^44 +# unsigned __int64 r[3]; # key value base 2^44 +# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4]; +# # r^n positions reflect +# # placement in register, not +# # memory, R[3] is R[1]*20 + +$code.=<<___; +.type poly1305_init_base2_44,\@function,3 +.align 32 +poly1305_init_base2_44: + xor %eax,%eax + mov %rax,0($ctx) # initialize hash value + mov %rax,8($ctx) + mov %rax,16($ctx) + +.Linit_base2_44: + lea poly1305_blocks_vpmadd52(%rip),%r10 + lea poly1305_emit_base2_44(%rip),%r11 + + mov \$0x0ffffffc0fffffff,%rax + mov \$0x0ffffffc0ffffffc,%rcx + and 0($inp),%rax + mov \$0x00000fffffffffff,%r8 + and 8($inp),%rcx + mov \$0x00000fffffffffff,%r9 + and %rax,%r8 + shrd \$44,%rcx,%rax + mov %r8,40($ctx) # r0 + and %r9,%rax + shr \$24,%rcx + mov %rax,48($ctx) # r1 + lea (%rax,%rax,4),%rax # *5 + mov %rcx,56($ctx) # r2 + shl \$2,%rax # magic <<2 + lea (%rcx,%rcx,4),%rcx # *5 + shl \$2,%rcx # magic <<2 + mov %rax,24($ctx) # s1 + mov %rcx,32($ctx) # s2 + movq \$-1,64($ctx) # write impossible value +___ +$code.=<<___ if ($flavour !~ /elf32/); + mov %r10,0(%rdx) + mov %r11,8(%rdx) +___ +$code.=<<___ if ($flavour =~ /elf32/); + mov %r10d,0(%rdx) + mov %r11d,4(%rdx) +___ +$code.=<<___; + mov \$1,%eax + RET +.size poly1305_init_base2_44,.-poly1305_init_base2_44 +___ +{ +my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17)); +my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21)); +my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52,\@function,4 +.align 32 +poly1305_blocks_vpmadd52: + shr \$4,$len + jz .Lno_data_vpmadd52 # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + + # if powers of the key are not calculated yet, process up to 3 + # blocks with this single-block subroutine, otherwise ensure that + # length is divisible by 2 blocks and pass the rest down to next + # subroutine... + + mov \$3,%rax + mov \$1,%r10 + cmp \$4,$len # is input long + cmovae %r10,%rax + test %r8,%r8 # is power value impossible? + cmovns %r10,%rax + + and $len,%rax # is input of favourable length? + jz .Lblocks_vpmadd52_4x + + sub %rax,$len + mov \$7,%r10d + mov \$1,%r11d + kmovw %r10d,%k7 + lea .L2_44_inp_permd(%rip),%r10 + kmovw %r11d,%k1 + + vmovq $padbit,%x#$PAD + vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd + vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift + vpermq \$0xcf,$PAD,$PAD + vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask + + vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value + vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys + vmovdqu64 32($ctx),${r1r0s2}{%k7}{z} + vmovdqu64 24($ctx),${r0s2s1}{%k7}{z} + + vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt + vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft + + jmp .Loop_vpmadd52 + +.align 32 +.Loop_vpmadd52: + vmovdqu32 0($inp),%x#$T0 # load input as ----3210 + lea 16($inp),$inp + + vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110 + vpsrlvq $inp_shift,$T0,$T0 + vpandq $reduc_mask,$T0,$T0 + vporq $PAD,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo # accumulate input + + vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value + vpermq \$0b01010101,$Dlo,${H1}{%k7}{z} + vpermq \$0b10101010,$Dlo,${H2}{%k7}{z} + + vpxord $Dlo,$Dlo,$Dlo + vpxord $Dhi,$Dhi,$Dhi + + vpmadd52luq $r2r1r0,$H0,$Dlo + vpmadd52huq $r2r1r0,$H0,$Dhi + + vpmadd52luq $r1r0s2,$H1,$Dlo + vpmadd52huq $r1r0s2,$H1,$Dhi + + vpmadd52luq $r0s2s1,$H2,$Dlo + vpmadd52huq $r0s2s1,$H2,$Dhi + + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword + vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword + vpandq $reduc_mask,$Dlo,$Dlo + + vpaddq $T0,$Dhi,$Dhi + + vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword + + vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-) + + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word + vpandq $reduc_mask,$Dlo,$Dlo + + vpermq \$0b10010011,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo + + vpermq \$0b10010011,$Dlo,${T0}{%k1}{z} + + vpaddq $T0,$Dlo,$Dlo + vpsllq \$2,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo + + dec %rax # len-=16 + jnz .Loop_vpmadd52 + + vmovdqu64 $Dlo,0($ctx){%k7} # store hash value + + test $len,$len + jnz .Lblocks_vpmadd52_4x + +.Lno_data_vpmadd52: + RET +.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 +___ +} +{ +######################################################################## +# As implied by its name 4x subroutine processes 4 blocks in parallel +# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power +# and is handled in 256-bit %ymm registers. + +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52_4x,\@function,4 +.align 32 +poly1305_blocks_vpmadd52_4x: + shr \$4,$len + jz .Lno_data_vpmadd52_4x # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + +.Lblocks_vpmadd52_4x: + vpbroadcastq $padbit,$PAD + + vmovdqa64 .Lx_mask44(%rip),$mask44 + mov \$5,%eax + vmovdqa64 .Lx_mask42(%rip),$mask42 + kmovw %eax,%k1 # used in 2x path + + test %r8,%r8 # is power value impossible? + js .Linit_vpmadd52 # if it is, then init R[4] + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + + test \$3,$len # is length 4*n+2? + jnz .Lblocks_vpmadd52_2x_do + +.Lblocks_vpmadd52_4x_do: + vpbroadcastq 64($ctx),$R0 # load 4th power of the key + vpbroadcastq 96($ctx),$R1 + vpbroadcastq 128($ctx),$R2 + vpbroadcastq 160($ctx),$S1 + +.Lblocks_vpmadd52_4x_key_loaded: + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + test \$7,$len # is len 8*n? + jz .Lblocks_vpmadd52_8x + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*2($inp),$T3 + lea 16*4($inp),$inp + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as 3-1-2-0 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + sub \$4,$len + jz .Ltail_vpmadd52_4x + jmp .Loop_vpmadd52_4x + ud2 + +.align 32 +.Linit_vpmadd52: + vmovq 24($ctx),%x#$S1 # load key + vmovq 56($ctx),%x#$H2 + vmovq 32($ctx),%x#$S2 + vmovq 40($ctx),%x#$R0 + vmovq 48($ctx),%x#$R1 + + vmovdqa $R0,$H0 + vmovdqa $R1,$H1 + vmovdqa $H2,$R2 + + mov \$2,%eax + +.Lmul_init_vpmadd52: + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + dec %eax + jz .Ldone_init_vpmadd52 + + vpunpcklqdq $R1,$H1,$R1 # 1,2 + vpbroadcastq %x#$H1,%x#$H1 # 2,2 + vpunpcklqdq $R2,$H2,$R2 + vpbroadcastq %x#$H2,%x#$H2 + vpunpcklqdq $R0,$H0,$R0 + vpbroadcastq %x#$H0,%x#$H0 + + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R1,$S1,$S1 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S1,$S1 + vpsllq \$2,$S2,$S2 + + jmp .Lmul_init_vpmadd52 + ud2 + +.align 32 +.Ldone_init_vpmadd52: + vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4 + vinserti128 \$1,%x#$R2,$H2,$R2 + vinserti128 \$1,%x#$R0,$H0,$R0 + + vpermq \$0b11011000,$R1,$R1 # 1,3,2,4 + vpermq \$0b11011000,$R2,$R2 + vpermq \$0b11011000,$R0,$R0 + + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 + vpaddq $R1,$S1,$S1 + vpsllq \$2,$S1,$S1 + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + + test \$3,$len # is length 4*n+2? + jnz .Ldone_init_vpmadd52_2x + + vmovdqu64 $R0,64($ctx) # save key powers + vpbroadcastq %x#$R0,$R0 # broadcast 4th power + vmovdqu64 $R1,96($ctx) + vpbroadcastq %x#$R1,$R1 + vmovdqu64 $R2,128($ctx) + vpbroadcastq %x#$R2,$R2 + vmovdqu64 $S1,160($ctx) + vpbroadcastq %x#$S1,$S1 + + jmp .Lblocks_vpmadd52_4x_key_loaded + ud2 + +.align 32 +.Ldone_init_vpmadd52_2x: + vmovdqu64 $R0,64($ctx) # save key powers + vpsrldq \$8,$R0,$R0 # 0-1-0-2 + vmovdqu64 $R1,96($ctx) + vpsrldq \$8,$R1,$R1 + vmovdqu64 $R2,128($ctx) + vpsrldq \$8,$R2,$R2 + vmovdqu64 $S1,160($ctx) + vpsrldq \$8,$S1,$S1 + jmp .Lblocks_vpmadd52_2x_key_loaded + ud2 + +.align 32 +.Lblocks_vpmadd52_2x_do: + vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers + vmovdqu64 160+8($ctx),${S1}{%k1}{z} + vmovdqu64 64+8($ctx),${R0}{%k1}{z} + vmovdqu64 96+8($ctx),${R1}{%k1}{z} + +.Lblocks_vpmadd52_2x_key_loaded: + vmovdqu64 16*0($inp),$T2 # load data + vpxorq $T3,$T3,$T3 + lea 16*2($inp),$inp + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as x-1-x-0 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + jmp .Ltail_vpmadd52_2x + ud2 + +.align 32 +.Loop_vpmadd52_4x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*2($inp),$T3 + lea 16*4($inp),$inp + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction (interleaved with data splat) + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpsrlq \$24,$T3,$T2 + vporq $PAD,$T2,$T2 + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + sub \$4,$len # len-=64 + jnz .Loop_vpmadd52_4x + +.Ltail_vpmadd52_4x: + vmovdqu64 128($ctx),$R2 # load all key powers + vmovdqu64 160($ctx),$S1 + vmovdqu64 64($ctx),$R0 + vmovdqu64 96($ctx),$R1 + +.Ltail_vpmadd52_2x: + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # horizontal addition + + mov \$1,%eax + kmovw %eax,%k1 + vpsrldq \$8,$D0lo,$T0 + vpsrldq \$8,$D0hi,$H0 + vpsrldq \$8,$D1lo,$T1 + vpsrldq \$8,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpsrldq \$8,$D2lo,$T2 + vpsrldq \$8,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vpermq \$0x2,$D0lo,$T0 + vpermq \$0x2,$D0hi,$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vpermq \$0x2,$D1lo,$T1 + vpermq \$0x2,$D1hi,$H1 + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} + vpermq \$0x2,$D2lo,$T2 + vpermq \$0x2,$D2hi,$H2 + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + # at this point $len is + # either 4*n+2 or 0... + sub \$2,$len # len-=32 + ja .Lblocks_vpmadd52_4x_do + + vmovq %x#$H0,0($ctx) + vmovq %x#$H1,8($ctx) + vmovq %x#$H2,16($ctx) + vzeroall + +.Lno_data_vpmadd52_4x: + RET +.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x +___ +} +{ +######################################################################## +# As implied by its name 8x subroutine processes 8 blocks in parallel... +# This is intermediate version, as it's used only in cases when input +# length is either 8*n, 8*n+1 or 8*n+2... + +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); +my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52_8x,\@function,4 +.align 32 +poly1305_blocks_vpmadd52_8x: + shr \$4,$len + jz .Lno_data_vpmadd52_8x # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + + vmovdqa64 .Lx_mask44(%rip),$mask44 + vmovdqa64 .Lx_mask42(%rip),$mask42 + + test %r8,%r8 # is power value impossible? + js .Linit_vpmadd52 # if it is, then init R[4] + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + +.Lblocks_vpmadd52_8x: + ################################################################ + # fist we calculate more key powers + + vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers + vmovdqu64 160($ctx),$S1 + vmovdqu64 64($ctx),$R0 + vmovdqu64 96($ctx),$R1 + + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + vpbroadcastq %x#$R2,$RR2 # broadcast 4th power + vpbroadcastq %x#$R0,$RR0 + vpbroadcastq %x#$R1,$RR1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $RR2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $RR2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $RR2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $RR2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $RR2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $RR2,$R0,$D2hi + + vpmadd52luq $RR0,$R0,$D0lo + vpmadd52huq $RR0,$R0,$D0hi + vpmadd52luq $RR0,$R1,$D1lo + vpmadd52huq $RR0,$R1,$D1hi + vpmadd52luq $RR0,$R2,$D2lo + vpmadd52huq $RR0,$R2,$D2hi + + vpmadd52luq $RR1,$S2,$D0lo + vpmadd52huq $RR1,$S2,$D0hi + vpmadd52luq $RR1,$R0,$D1lo + vpmadd52huq $RR1,$R0,$D1hi + vpmadd52luq $RR1,$R1,$D2lo + vpmadd52huq $RR1,$R1,$D2hi + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$RR0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$RR1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$RR2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$RR0,$RR0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$RR0,$RR0 + + vpsrlq \$44,$RR0,$tmp # additional step + vpandq $mask44,$RR0,$RR0 + + vpaddq $tmp,$RR1,$RR1 + + ################################################################ + # At this point Rx holds 1324 powers, RRx - 5768, and the goal + # is 15263748, which reflects how data is loaded... + + vpunpcklqdq $R2,$RR2,$T2 # 3748 + vpunpckhqdq $R2,$RR2,$R2 # 1526 + vpunpcklqdq $R0,$RR0,$T0 + vpunpckhqdq $R0,$RR0,$R0 + vpunpcklqdq $R1,$RR1,$T1 + vpunpckhqdq $R1,$RR1,$R1 +___ +######## switch to %zmm +map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); +map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); +map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); +map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2); + +$code.=<<___; + vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748 + vshufi64x2 \$0x44,$R0,$T0,$RR0 + vshufi64x2 \$0x44,$R1,$T1,$RR1 + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*4($inp),$T3 + lea 16*8($inp),$inp + + vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4 + vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4 + vpaddq $RR2,$SS2,$SS2 + vpaddq $RR1,$SS1,$SS1 + vpsllq \$2,$SS2,$SS2 + vpsllq \$2,$SS1,$SS1 + + vpbroadcastq $padbit,$PAD + vpbroadcastq %x#$mask44,$mask44 + vpbroadcastq %x#$mask42,$mask42 + + vpbroadcastq %x#$SS1,$S1 # broadcast 8th power + vpbroadcastq %x#$SS2,$S2 + vpbroadcastq %x#$RR0,$R0 + vpbroadcastq %x#$RR1,$R1 + vpbroadcastq %x#$RR2,$R2 + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as 73625140 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + sub \$8,$len + jz .Ltail_vpmadd52_8x + jmp .Loop_vpmadd52_8x + +.align 32 +.Loop_vpmadd52_8x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*4($inp),$T3 + lea 16*8($inp),$inp + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction (interleaved with data splat) + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpsrlq \$24,$T3,$T2 + vporq $PAD,$T2,$T2 + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + sub \$8,$len # len-=128 + jnz .Loop_vpmadd52_8x + +.Ltail_vpmadd52_8x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$SS1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$SS1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$SS2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$SS2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$RR0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$RR0,$D2hi + + vpmadd52luq $H0,$RR0,$D0lo + vpmadd52huq $H0,$RR0,$D0hi + vpmadd52luq $H0,$RR1,$D1lo + vpmadd52huq $H0,$RR1,$D1hi + vpmadd52luq $H0,$RR2,$D2lo + vpmadd52huq $H0,$RR2,$D2hi + + vpmadd52luq $H1,$SS2,$D0lo + vpmadd52huq $H1,$SS2,$D0hi + vpmadd52luq $H1,$RR0,$D1lo + vpmadd52huq $H1,$RR0,$D1hi + vpmadd52luq $H1,$RR1,$D2lo + vpmadd52huq $H1,$RR1,$D2hi + + ################################################################ + # horizontal addition + + mov \$1,%eax + kmovw %eax,%k1 + vpsrldq \$8,$D0lo,$T0 + vpsrldq \$8,$D0hi,$H0 + vpsrldq \$8,$D1lo,$T1 + vpsrldq \$8,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpsrldq \$8,$D2lo,$T2 + vpsrldq \$8,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vpermq \$0x2,$D0lo,$T0 + vpermq \$0x2,$D0hi,$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vpermq \$0x2,$D1lo,$T1 + vpermq \$0x2,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpermq \$0x2,$D2lo,$T2 + vpermq \$0x2,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vextracti64x4 \$1,$D0lo,%y#$T0 + vextracti64x4 \$1,$D0hi,%y#$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vextracti64x4 \$1,$D1lo,%y#$T1 + vextracti64x4 \$1,$D1hi,%y#$H1 + vextracti64x4 \$1,$D2lo,%y#$T2 + vextracti64x4 \$1,$D2hi,%y#$H2 +___ +######## switch back to %ymm +map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); +map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); +map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); + +$code.=<<___; + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + ################################################################ + + vmovq %x#$H0,0($ctx) + vmovq %x#$H1,8($ctx) + vmovq %x#$H2,16($ctx) + vzeroall + +.Lno_data_vpmadd52_8x: + RET +.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x +___ +} +$code.=<<___; +.type poly1305_emit_base2_44,\@function,3 +.align 32 +poly1305_emit_base2_44: + mov 0($ctx),%r8 # load hash value + mov 8($ctx),%r9 + mov 16($ctx),%r10 + + mov %r9,%rax + shr \$20,%r9 + shl \$44,%rax + mov %r10,%rcx + shr \$40,%r10 + shl \$24,%rcx + + add %rax,%r8 + adc %rcx,%r9 + adc \$0,%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + RET +.size poly1305_emit_base2_44,.-poly1305_emit_base2_44 +___ +} } } +} + +if (!$kernel) +{ # chacha20-poly1305 helpers +my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order +$code.=<<___; +.globl xor128_encrypt_n_pad +.type xor128_encrypt_n_pad,\@abi-omnipotent +.align 16 +xor128_encrypt_n_pad: + sub $otp,$inp + sub $otp,$out + mov $len,%r10 # put len aside + shr \$4,$len # len / 16 + jz .Ltail_enc + nop +.Loop_enc_xmm: + movdqu ($inp,$otp),%xmm0 + pxor ($otp),%xmm0 + movdqu %xmm0,($out,$otp) + movdqa %xmm0,($otp) + lea 16($otp),$otp + dec $len + jnz .Loop_enc_xmm + + and \$15,%r10 # len % 16 + jz .Ldone_enc + +.Ltail_enc: + mov \$16,$len + sub %r10,$len + xor %eax,%eax +.Loop_enc_byte: + mov ($inp,$otp),%al + xor ($otp),%al + mov %al,($out,$otp) + mov %al,($otp) + lea 1($otp),$otp + dec %r10 + jnz .Loop_enc_byte + + xor %eax,%eax +.Loop_enc_pad: + mov %al,($otp) + lea 1($otp),$otp + dec $len + jnz .Loop_enc_pad + +.Ldone_enc: + mov $otp,%rax + RET +.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad + +.globl xor128_decrypt_n_pad +.type xor128_decrypt_n_pad,\@abi-omnipotent +.align 16 +xor128_decrypt_n_pad: + sub $otp,$inp + sub $otp,$out + mov $len,%r10 # put len aside + shr \$4,$len # len / 16 + jz .Ltail_dec + nop +.Loop_dec_xmm: + movdqu ($inp,$otp),%xmm0 + movdqa ($otp),%xmm1 + pxor %xmm0,%xmm1 + movdqu %xmm1,($out,$otp) + movdqa %xmm0,($otp) + lea 16($otp),$otp + dec $len + jnz .Loop_dec_xmm + + pxor %xmm1,%xmm1 + and \$15,%r10 # len % 16 + jz .Ldone_dec + +.Ltail_dec: + mov \$16,$len + sub %r10,$len + xor %eax,%eax + xor %r11d,%r11d +.Loop_dec_byte: + mov ($inp,$otp),%r11b + mov ($otp),%al + xor %r11b,%al + mov %al,($out,$otp) + mov %r11b,($otp) + lea 1($otp),$otp + dec %r10 + jnz .Loop_dec_byte + + xor %eax,%eax +.Loop_dec_pad: + mov %al,($otp) + lea 1($otp),$otp + dec $len + jnz .Loop_dec_pad + +.Ldone_dec: + mov $otp,%rax + RET +.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad +___ +} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lcommon_seh_tail + + lea 48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R14 + + jmp .Lcommon_seh_tail +.size se_handler,.-se_handler + +.type avx_handler,\@abi-omnipotent +.align 16 +avx_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 208($context),%rax # pull context->R11 + + lea 0x50(%rax),%rsi + lea 0xf8(%rax),%rax + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + RET +.size avx_handler,.-avx_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_end_poly1305_init_x86_64 + .rva .LSEH_info_poly1305_init_x86_64 + + .rva .LSEH_begin_poly1305_blocks_x86_64 + .rva .LSEH_end_poly1305_blocks_x86_64 + .rva .LSEH_info_poly1305_blocks_x86_64 + + .rva .LSEH_begin_poly1305_emit_x86_64 + .rva .LSEH_end_poly1305_emit_x86_64 + .rva .LSEH_info_poly1305_emit_x86_64 +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_poly1305_blocks_avx + .rva .Lbase2_64_avx + .rva .LSEH_info_poly1305_blocks_avx_1 + + .rva .Lbase2_64_avx + .rva .Leven_avx + .rva .LSEH_info_poly1305_blocks_avx_2 + + .rva .Leven_avx + .rva .LSEH_end_poly1305_blocks_avx + .rva .LSEH_info_poly1305_blocks_avx_3 + + .rva .LSEH_begin_poly1305_emit_avx + .rva .LSEH_end_poly1305_emit_avx + .rva .LSEH_info_poly1305_emit_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_poly1305_blocks_avx2 + .rva .Lbase2_64_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_1 + + .rva .Lbase2_64_avx2 + .rva .Leven_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_2 + + .rva .Leven_avx2 + .rva .LSEH_end_poly1305_blocks_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_3 +___ +$code.=<<___ if ($avx>2); + .rva .LSEH_begin_poly1305_blocks_avx512 + .rva .LSEH_end_poly1305_blocks_avx512 + .rva .LSEH_info_poly1305_blocks_avx512 +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_poly1305_init_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 + +.LSEH_info_poly1305_blocks_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_body,.Lblocks_epilogue + +.LSEH_info_poly1305_emit_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64 +___ +$code.=<<___ if ($avx); +.LSEH_info_poly1305_blocks_avx_1: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx_2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx_3: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_emit_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx +___ +$code.=<<___ if ($avx>1); +.LSEH_info_poly1305_blocks_avx2_1: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx2_2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx2_3: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[] +___ +$code.=<<___ if ($avx>2); +.LSEH_info_poly1305_blocks_avx512: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[] +___ +} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach (split('\n',$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + s/%r([a-z]+)#d/%e$1/g; + s/%r([0-9]+)#d/%r$1d/g; + s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; + + if ($kernel) { + s/(^\.type.*),[0-9]+$/\1/; + s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/; + next if /^\.cfi.*/; + } + + print $_,"\n"; +} +close STDOUT; diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c new file mode 100644 index 000000000..1dfb8af48 --- /dev/null +++ b/arch/x86/crypto/poly1305_glue.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_init_x86_64(void *ctx, + const u8 key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, + const size_t len, const u32 padbit); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); + +struct poly1305_arch_internal { + union { + struct { + u32 h[5]; + u32 is_base2_26; + }; + u64 hs[3]; + }; + u64 r[2]; + u64 pad; + struct { u32 r2, r1, r4, r3; } rn[9]; +}; + +/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + +static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]) +{ + poly1305_init_x86_64(ctx, key); +} + +static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, + const u32 padbit) +{ + struct poly1305_arch_internal *state = ctx; + + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || + SZ_4K % POLY1305_BLOCK_SIZE); + + if (!static_branch_likely(&poly1305_use_avx) || + (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || + !crypto_simd_usable()) { + convert_to_base2_64(ctx); + poly1305_blocks_x86_64(ctx, inp, len, padbit); + return; + } + + do { + const size_t bytes = min_t(size_t, len, SZ_4K); + + kernel_fpu_begin(); + if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) + poly1305_blocks_avx512(ctx, inp, bytes, padbit); + else if (static_branch_likely(&poly1305_use_avx2)) + poly1305_blocks_avx2(ctx, inp, bytes, padbit); + else + poly1305_blocks_avx(ctx, inp, bytes, padbit); + kernel_fpu_end(); + + len -= bytes; + inp += bytes; + } while (len); +} + +static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]) +{ + if (!static_branch_likely(&poly1305_use_avx)) + poly1305_emit_x86_64(ctx, mac, nonce); + else + poly1305_emit_avx(ctx, mac, nonce); +} + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) +{ + poly1305_simd_init(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(&key[16]); + dctx->s[1] = get_unaligned_le32(&key[20]); + dctx->s[2] = get_unaligned_le32(&key[24]); + dctx->s[3] = get_unaligned_le32(&key[28]); + dctx->buflen = 0; + dctx->sset = true; +} +EXPORT_SYMBOL(poly1305_init_arch); + +static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx, + const u8 *inp, unsigned int len) +{ + unsigned int acc = 0; + if (unlikely(!dctx->sset)) { + if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) { + poly1305_simd_init(&dctx->h, inp); + inp += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + acc += POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(&inp[0]); + dctx->s[1] = get_unaligned_le32(&inp[4]); + dctx->s[2] = get_unaligned_le32(&inp[8]); + dctx->s[3] = get_unaligned_le32(&inp[12]); + acc += POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + } + return acc; +} + +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int srclen) +{ + unsigned int bytes, used; + + if (unlikely(dctx->buflen)) { + bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + srclen -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE))) + poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(srclen >= POLY1305_BLOCK_SIZE)) { + bytes = round_down(srclen, POLY1305_BLOCK_SIZE); + srclen -= bytes; + used = crypto_poly1305_setdctxkey(dctx, src, bytes); + if (likely(bytes - used)) + poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1); + src += bytes; + } + + if (unlikely(srclen)) { + dctx->buflen = srclen; + memcpy(dctx->buf, src, srclen); + } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) +{ + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_simd_emit(&dctx->h, dst, dctx->s); + memzero_explicit(dctx, sizeof(*dctx)); +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int crypto_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + *dctx = (struct poly1305_desc_ctx){}; + return 0; +} + +static int crypto_poly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + poly1305_update_arch(dctx, src, srclen); + return 0; +} + +static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); + return 0; +} + +static struct shash_alg alg = { + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_poly1305_init, + .update = crypto_poly1305_update, + .final = crypto_poly1305_final, + .descsize = sizeof(struct poly1305_desc_ctx), + .base = { + .cra_name = "poly1305", + .cra_driver_name = "poly1305-simd", + .cra_priority = 300, + .cra_blocksize = POLY1305_BLOCK_SIZE, + .cra_module = THIS_MODULE, + }, +}; + +static int __init poly1305_simd_mod_init(void) +{ + if (boot_cpu_has(X86_FEATURE_AVX) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx); + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx2); + if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X) + static_branch_enable(&poly1305_use_avx512); + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0; +} + +static void __exit poly1305_simd_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + crypto_unregister_shash(&alg); +} + +module_init(poly1305_simd_mod_init); +module_exit(poly1305_simd_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_DESCRIPTION("Poly1305 authenticator"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-simd"); diff --git a/arch/x86/crypto/polyval-clmulni_asm.S b/arch/x86/crypto/polyval-clmulni_asm.S new file mode 100644 index 000000000..a6ebe4e7d --- /dev/null +++ b/arch/x86/crypto/polyval-clmulni_asm.S @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2021 Google LLC + */ +/* + * This is an efficient implementation of POLYVAL using intel PCLMULQDQ-NI + * instructions. It works on 8 blocks at a time, by precomputing the first 8 + * keys powers h^8, ..., h^1 in the POLYVAL finite field. This precomputation + * allows us to split finite field multiplication into two steps. + * + * In the first step, we consider h^i, m_i as normal polynomials of degree less + * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication + * is simply polynomial multiplication. + * + * In the second step, we compute the reduction of p(x) modulo the finite field + * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1. + * + * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where + * multiplication is finite field multiplication. The advantage is that the + * two-step process only requires 1 finite field reduction for every 8 + * polynomial multiplications. Further parallelism is gained by interleaving the + * multiplications and polynomial reductions. + */ + +#include +#include + +#define STRIDE_BLOCKS 8 + +#define GSTAR %xmm7 +#define PL %xmm8 +#define PH %xmm9 +#define TMP_XMM %xmm11 +#define LO %xmm12 +#define HI %xmm13 +#define MI %xmm14 +#define SUM %xmm15 + +#define KEY_POWERS %rdi +#define MSG %rsi +#define BLOCKS_LEFT %rdx +#define ACCUMULATOR %rcx +#define TMP %rax + +.section .rodata.cst16.gstar, "aM", @progbits, 16 +.align 16 + +.Lgstar: + .quad 0xc200000000000000, 0xc200000000000000 + +.text + +/* + * Performs schoolbook1_iteration on two lists of 128-bit polynomials of length + * count pointed to by MSG and KEY_POWERS. + */ +.macro schoolbook1 count + .set i, 0 + .rept (\count) + schoolbook1_iteration i 0 + .set i, (i +1) + .endr +.endm + +/* + * Computes the product of two 128-bit polynomials at the memory locations + * specified by (MSG + 16*i) and (KEY_POWERS + 16*i) and XORs the components of + * the 256-bit product into LO, MI, HI. + * + * Given: + * X = [X_1 : X_0] + * Y = [Y_1 : Y_0] + * + * We compute: + * LO += X_0 * Y_0 + * MI += X_0 * Y_1 + X_1 * Y_0 + * HI += X_1 * Y_1 + * + * Later, the 256-bit result can be extracted as: + * [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0] + * This step is done when computing the polynomial reduction for efficiency + * reasons. + * + * If xor_sum == 1, then also XOR the value of SUM into m_0. This avoids an + * extra multiplication of SUM and h^8. + */ +.macro schoolbook1_iteration i xor_sum + movups (16*\i)(MSG), %xmm0 + .if (\i == 0 && \xor_sum == 1) + pxor SUM, %xmm0 + .endif + vpclmulqdq $0x01, (16*\i)(KEY_POWERS), %xmm0, %xmm2 + vpclmulqdq $0x00, (16*\i)(KEY_POWERS), %xmm0, %xmm1 + vpclmulqdq $0x10, (16*\i)(KEY_POWERS), %xmm0, %xmm3 + vpclmulqdq $0x11, (16*\i)(KEY_POWERS), %xmm0, %xmm4 + vpxor %xmm2, MI, MI + vpxor %xmm1, LO, LO + vpxor %xmm4, HI, HI + vpxor %xmm3, MI, MI +.endm + +/* + * Performs the same computation as schoolbook1_iteration, except we expect the + * arguments to already be loaded into xmm0 and xmm1 and we set the result + * registers LO, MI, and HI directly rather than XOR'ing into them. + */ +.macro schoolbook1_noload + vpclmulqdq $0x01, %xmm0, %xmm1, MI + vpclmulqdq $0x10, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x00, %xmm0, %xmm1, LO + vpclmulqdq $0x11, %xmm0, %xmm1, HI + vpxor %xmm2, MI, MI +.endm + +/* + * Computes the 256-bit polynomial represented by LO, HI, MI. Stores + * the result in PL, PH. + * [PH : PL] = [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0] + */ +.macro schoolbook2 + vpslldq $8, MI, PL + vpsrldq $8, MI, PH + pxor LO, PL + pxor HI, PH +.endm + +/* + * Computes the 128-bit reduction of PH : PL. Stores the result in dest. + * + * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) = + * x^128 + x^127 + x^126 + x^121 + 1. + * + * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the + * product of two 128-bit polynomials in Montgomery form. We need to reduce it + * mod g(x). Also, since polynomials in Montgomery form have an "extra" factor + * of x^128, this product has two extra factors of x^128. To get it back into + * Montgomery form, we need to remove one of these factors by dividing by x^128. + * + * To accomplish both of these goals, we add multiples of g(x) that cancel out + * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low + * bits are zero, the polynomial division by x^128 can be done by right shifting. + * + * Since the only nonzero term in the low 64 bits of g(x) is the constant term, + * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can + * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 + + * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to + * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T + * = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191. + * + * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits + * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1 + * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) * + * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 : + * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0). + * + * So our final computation is: + * T = T_1 : T_0 = g*(x) * P_0 + * V = V_1 : V_0 = g*(x) * (P_1 + T_0) + * p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0 + * + * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0 + * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 : + * T_1 into dest. This allows us to reuse P_1 + T_0 when computing V. + */ +.macro montgomery_reduction dest + vpclmulqdq $0x00, PL, GSTAR, TMP_XMM # TMP_XMM = T_1 : T_0 = P_0 * g*(x) + pshufd $0b01001110, TMP_XMM, TMP_XMM # TMP_XMM = T_0 : T_1 + pxor PL, TMP_XMM # TMP_XMM = P_1 + T_0 : P_0 + T_1 + pxor TMP_XMM, PH # PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1 + pclmulqdq $0x11, GSTAR, TMP_XMM # TMP_XMM = V_1 : V_0 = V = [(P_1 + T_0) * g*(x)] + vpxor TMP_XMM, PH, \dest +.endm + +/* + * Compute schoolbook multiplication for 8 blocks + * m_0h^8 + ... + m_7h^1 + * + * If reduce is set, also computes the montgomery reduction of the + * previous full_stride call and XORs with the first message block. + * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1. + * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0. + */ +.macro full_stride reduce + pxor LO, LO + pxor HI, HI + pxor MI, MI + + schoolbook1_iteration 7 0 + .if \reduce + vpclmulqdq $0x00, PL, GSTAR, TMP_XMM + .endif + + schoolbook1_iteration 6 0 + .if \reduce + pshufd $0b01001110, TMP_XMM, TMP_XMM + .endif + + schoolbook1_iteration 5 0 + .if \reduce + pxor PL, TMP_XMM + .endif + + schoolbook1_iteration 4 0 + .if \reduce + pxor TMP_XMM, PH + .endif + + schoolbook1_iteration 3 0 + .if \reduce + pclmulqdq $0x11, GSTAR, TMP_XMM + .endif + + schoolbook1_iteration 2 0 + .if \reduce + vpxor TMP_XMM, PH, SUM + .endif + + schoolbook1_iteration 1 0 + + schoolbook1_iteration 0 1 + + addq $(8*16), MSG + schoolbook2 +.endm + +/* + * Process BLOCKS_LEFT blocks, where 0 < BLOCKS_LEFT < STRIDE_BLOCKS + */ +.macro partial_stride + mov BLOCKS_LEFT, TMP + shlq $4, TMP + addq $(16*STRIDE_BLOCKS), KEY_POWERS + subq TMP, KEY_POWERS + + movups (MSG), %xmm0 + pxor SUM, %xmm0 + movaps (KEY_POWERS), %xmm1 + schoolbook1_noload + dec BLOCKS_LEFT + addq $16, MSG + addq $16, KEY_POWERS + + test $4, BLOCKS_LEFT + jz .Lpartial4BlocksDone + schoolbook1 4 + addq $(4*16), MSG + addq $(4*16), KEY_POWERS +.Lpartial4BlocksDone: + test $2, BLOCKS_LEFT + jz .Lpartial2BlocksDone + schoolbook1 2 + addq $(2*16), MSG + addq $(2*16), KEY_POWERS +.Lpartial2BlocksDone: + test $1, BLOCKS_LEFT + jz .LpartialDone + schoolbook1 1 +.LpartialDone: + schoolbook2 + montgomery_reduction SUM +.endm + +/* + * Perform montgomery multiplication in GF(2^128) and store result in op1. + * + * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1 + * If op1, op2 are in montgomery form, this computes the montgomery + * form of op1*op2. + * + * void clmul_polyval_mul(u8 *op1, const u8 *op2); + */ +SYM_FUNC_START(clmul_polyval_mul) + FRAME_BEGIN + vmovdqa .Lgstar(%rip), GSTAR + movups (%rdi), %xmm0 + movups (%rsi), %xmm1 + schoolbook1_noload + schoolbook2 + montgomery_reduction SUM + movups SUM, (%rdi) + FRAME_END + RET +SYM_FUNC_END(clmul_polyval_mul) + +/* + * Perform polynomial evaluation as specified by POLYVAL. This computes: + * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1} + * where n=nblocks, h is the hash key, and m_i are the message blocks. + * + * rdi - pointer to precomputed key powers h^8 ... h^1 + * rsi - pointer to message blocks + * rdx - number of blocks to hash + * rcx - pointer to the accumulator + * + * void clmul_polyval_update(const struct polyval_tfm_ctx *keys, + * const u8 *in, size_t nblocks, u8 *accumulator); + */ +SYM_FUNC_START(clmul_polyval_update) + FRAME_BEGIN + vmovdqa .Lgstar(%rip), GSTAR + movups (ACCUMULATOR), SUM + subq $STRIDE_BLOCKS, BLOCKS_LEFT + js .LstrideLoopExit + full_stride 0 + subq $STRIDE_BLOCKS, BLOCKS_LEFT + js .LstrideLoopExitReduce +.LstrideLoop: + full_stride 1 + subq $STRIDE_BLOCKS, BLOCKS_LEFT + jns .LstrideLoop +.LstrideLoopExitReduce: + montgomery_reduction SUM +.LstrideLoopExit: + add $STRIDE_BLOCKS, BLOCKS_LEFT + jz .LskipPartial + partial_stride +.LskipPartial: + movups SUM, (ACCUMULATOR) + FRAME_END + RET +SYM_FUNC_END(clmul_polyval_update) diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c new file mode 100644 index 000000000..8fa58b0f3 --- /dev/null +++ b/arch/x86/crypto/polyval-clmulni_glue.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Glue code for POLYVAL using PCMULQDQ-NI + * + * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Copyright 2021 Google LLC + */ + +/* + * Glue code based on ghash-clmulni-intel_glue.c. + * + * This implementation of POLYVAL uses montgomery multiplication + * accelerated by PCLMULQDQ-NI to implement the finite field + * operations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define POLYVAL_ALIGN 16 +#define POLYVAL_ALIGN_ATTR __aligned(POLYVAL_ALIGN) +#define POLYVAL_ALIGN_EXTRA ((POLYVAL_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) +#define POLYVAL_CTX_SIZE (sizeof(struct polyval_tfm_ctx) + POLYVAL_ALIGN_EXTRA) +#define NUM_KEY_POWERS 8 + +struct polyval_tfm_ctx { + /* + * These powers must be in the order h^8, ..., h^1. + */ + u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE] POLYVAL_ALIGN_ATTR; +}; + +struct polyval_desc_ctx { + u8 buffer[POLYVAL_BLOCK_SIZE]; + u32 bytes; +}; + +asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys, + const u8 *in, size_t nblocks, u8 *accumulator); +asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2); + +static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm) +{ + return PTR_ALIGN(crypto_shash_ctx(tfm), POLYVAL_ALIGN); +} + +static void internal_polyval_update(const struct polyval_tfm_ctx *keys, + const u8 *in, size_t nblocks, u8 *accumulator) +{ + if (likely(crypto_simd_usable())) { + kernel_fpu_begin(); + clmul_polyval_update(keys, in, nblocks, accumulator); + kernel_fpu_end(); + } else { + polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in, + nblocks, accumulator); + } +} + +static void internal_polyval_mul(u8 *op1, const u8 *op2) +{ + if (likely(crypto_simd_usable())) { + kernel_fpu_begin(); + clmul_polyval_mul(op1, op2); + kernel_fpu_end(); + } else { + polyval_mul_non4k(op1, op2); + } +} + +static int polyval_x86_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(tfm); + int i; + + if (keylen != POLYVAL_BLOCK_SIZE) + return -EINVAL; + + memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE); + + for (i = NUM_KEY_POWERS-2; i >= 0; i--) { + memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE); + internal_polyval_mul(tctx->key_powers[i], + tctx->key_powers[i+1]); + } + + return 0; +} + +static int polyval_x86_init(struct shash_desc *desc) +{ + struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int polyval_x86_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); + const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm); + u8 *pos; + unsigned int nblocks; + unsigned int n; + + if (dctx->bytes) { + n = min(srclen, dctx->bytes); + pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes; + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + internal_polyval_mul(dctx->buffer, + tctx->key_powers[NUM_KEY_POWERS-1]); + } + + while (srclen >= POLYVAL_BLOCK_SIZE) { + /* Allow rescheduling every 4K bytes. */ + nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE; + internal_polyval_update(tctx, src, nblocks, dctx->buffer); + srclen -= nblocks * POLYVAL_BLOCK_SIZE; + src += nblocks * POLYVAL_BLOCK_SIZE; + } + + if (srclen) { + dctx->bytes = POLYVAL_BLOCK_SIZE - srclen; + pos = dctx->buffer; + while (srclen--) + *pos++ ^= *src++; + } + + return 0; +} + +static int polyval_x86_final(struct shash_desc *desc, u8 *dst) +{ + struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); + const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm); + + if (dctx->bytes) { + internal_polyval_mul(dctx->buffer, + tctx->key_powers[NUM_KEY_POWERS-1]); + } + + memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg polyval_alg = { + .digestsize = POLYVAL_DIGEST_SIZE, + .init = polyval_x86_init, + .update = polyval_x86_update, + .final = polyval_x86_final, + .setkey = polyval_x86_setkey, + .descsize = sizeof(struct polyval_desc_ctx), + .base = { + .cra_name = "polyval", + .cra_driver_name = "polyval-clmulni", + .cra_priority = 200, + .cra_blocksize = POLYVAL_BLOCK_SIZE, + .cra_ctxsize = POLYVAL_CTX_SIZE, + .cra_module = THIS_MODULE, + }, +}; + +__maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = { + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); + +static int __init polyval_clmulni_mod_init(void) +{ + if (!x86_match_cpu(pcmul_cpu_id)) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_AVX)) + return -ENODEV; + + return crypto_register_shash(&polyval_alg); +} + +static void __exit polyval_clmulni_mod_exit(void) +{ + crypto_unregister_shash(&polyval_alg); +} + +module_init(polyval_clmulni_mod_init); +module_exit(polyval_clmulni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI"); +MODULE_ALIAS_CRYPTO("polyval"); +MODULE_ALIAS_CRYPTO("polyval-clmulni"); diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 000000000..82f2313f5 --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -0,0 +1,713 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2011-2013 Jussi Kivilinna + */ + +#include +#include +#include "glue_helper-asm-avx.S" + +.file "serpent-avx-x86_64-asm_64.S" + +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.text + +#define CTX %rdi + +/********************************************************************** + 8-way AVX serpent + **********************************************************************/ +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 +#define RE1 %xmm4 + +#define tp %xmm5 + +#define RA2 %xmm6 +#define RB2 %xmm7 +#define RC2 %xmm8 +#define RD2 %xmm9 +#define RE2 %xmm10 + +#define RNOT %xmm11 + +#define RK0 %xmm12 +#define RK1 %xmm13 +#define RK2 %xmm14 +#define RK3 %xmm15 + + +#define S0_1(x0, x1, x2, x3, x4) \ + vpor x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x4; \ + vpxor RNOT, x4, x4; \ + vpxor x1, tp, x3; \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpxor x0, x2, x2; +#define S0_2(x0, x1, x2, x3, x4) \ + vpxor x3, x0, x0; \ + vpor x0, x4, x4; \ + vpxor x2, x0, x0; \ + vpand x1, x2, x2; \ + vpxor x2, x3, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x4, x2, x2; \ + vpxor x2, x1, x1; + +#define S1_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, tp; \ + vpxor x3, x0, x0; \ + vpxor RNOT, x3, x3; \ + vpand tp, x1, x4; \ + vpor tp, x0, x0; \ + vpxor x2, x3, x3; \ + vpxor x3, x0, x0; \ + vpxor x3, tp, x1; +#define S1_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpor x4, x1, x1; \ + vpxor x2, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x2, x2; \ + vpor x0, x1, x1; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x0, x0; \ + vpxor x1, x4, x4; + +#define S2_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, tp; \ + vpxor x3, tp, tp; \ + vpor x0, x3, x3; \ + vpxor x1, x2, x2; \ + vpxor x1, x3, x3; \ + vpand tp, x1, x1; +#define S2_2(x0, x1, x2, x3, x4) \ + vpxor x2, tp, tp; \ + vpand x3, x2, x2; \ + vpor x1, x3, x3; \ + vpxor RNOT, tp, tp; \ + vpxor tp, x3, x3; \ + vpxor tp, x0, x4; \ + vpxor x2, tp, x0; \ + vpor x2, x1, x1; + +#define S3_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, tp; \ + vpor x0, x3, x3; \ + vpand x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpxor tp, x2, x2; \ + vpand x3, tp, x1; \ + vpxor x3, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x4, x4; +#define S3_2(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpand x3, x0, x0; \ + vpand x4, x3, x3; \ + vpxor x2, x3, x3; \ + vpor x1, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x4, x4; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x3; + +#define S4_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor x1, x0, x0; \ + vpxor tp, x3, x4; \ + vpor x0, x2, x2; \ + vpxor x1, x2, x2; +#define S4_2(x0, x1, x2, x3, x4) \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpand x2, x4, x4; \ + vpxor tp, x2, x2; \ + vpxor x0, x4, x4; \ + vpor x1, tp, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x0, x3, x3; + +#define S5_1(x0, x1, x2, x3, x4) \ + vpor x0, x1, tp; \ + vpxor tp, x2, x2; \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpand x4, tp, x1; \ + vpor x3, x4, x4; \ + vpxor x0, x4, x4; +#define S5_2(x0, x1, x2, x3, x4) \ + vpand x3, x0, x0; \ + vpxor x3, x1, x1; \ + vpxor x2, x3, x3; \ + vpxor x1, x0, x0; \ + vpand x4, x2, x2; \ + vpxor x2, x1, x1; \ + vpand x0, x2, x2; \ + vpxor x2, x3, x3; + +#define S6_1(x0, x1, x2, x3, x4) \ + vpxor x0, x3, x3; \ + vpxor x2, x1, tp; \ + vpxor x0, x2, x2; \ + vpand x3, x0, x0; \ + vpor x3, tp, tp; \ + vpxor RNOT, x1, x4; \ + vpxor tp, x0, x0; \ + vpxor x2, tp, x1; +#define S6_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x4, x4; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x3, x3; \ + vpxor x2, x1, x1; + +#define S7_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x1, tp; \ + vpxor RNOT, x0, x0; \ + vpand x2, tp, x1; \ + vpxor x3, x1, x1; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x4; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x3; \ + vpor x1, x0, x0; +#define S7_2(x0, x1, x2, x3, x4) \ + vpand x0, x2, x2; \ + vpxor x4, x0, x0; \ + vpxor x3, x4, x4; \ + vpand x0, x3, x3; \ + vpxor x1, x4, x4; \ + vpxor x4, x2, x2; \ + vpxor x1, x3, x3; \ + vpor x0, x4, x4; \ + vpxor x1, x4, x4; + +#define SI0_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpor x1, x3, tp; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpxor tp, x2, x2; \ + vpxor x0, tp, x3; \ + vpand x1, x0, x0; \ + vpxor x2, x0, x0; +#define SI0_2(x0, x1, x2, x3, x4) \ + vpand x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x3, x2, x2; \ + vpxor x3, x1, x1; \ + vpand x0, x3, x3; \ + vpxor x0, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x4, x4; + +#define SI1_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, tp; \ + vpxor RNOT, x2, x2; \ + vpor x1, x0, x4; \ + vpxor x3, x4, x4; \ + vpand x1, x3, x3; \ + vpxor x2, x1, x1; \ + vpand x4, x2, x2; +#define SI1_2(x0, x1, x2, x3, x4) \ + vpxor x1, x4, x4; \ + vpor x3, x1, x1; \ + vpxor tp, x3, x3; \ + vpxor tp, x2, x2; \ + vpor x4, tp, x0; \ + vpxor x4, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor x1, x4, x4; + +#define SI2_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpxor RNOT, x3, tp; \ + vpor x2, tp, tp; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x4; \ + vpxor x1, tp, x3; \ + vpor x2, x1, x1; \ + vpxor x0, x2, x2; +#define SI2_2(x0, x1, x2, x3, x4) \ + vpxor x4, x1, x1; \ + vpor x3, x4, x4; \ + vpxor x3, x2, x2; \ + vpxor x2, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; + +#define SI3_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpand x2, x1, tp; \ + vpxor x0, tp, tp; \ + vpor x1, x0, x0; \ + vpxor x3, x1, x4; \ + vpxor x3, x0, x0; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x1; +#define SI3_2(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x1, x0, x0; + +#define SI4_1(x0, x1, x2, x3, x4) \ + vpxor x3, x2, x2; \ + vpand x1, x0, tp; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor RNOT, x0, x4; \ + vpxor tp, x1, x1; \ + vpxor x2, tp, x0; \ + vpand x4, x2, x2; +#define SI4_2(x0, x1, x2, x3, x4) \ + vpxor x0, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x0, x0; \ + vpand x2, x3, x3; \ + vpxor x3, x4, x4; \ + vpxor x1, x3, x3; \ + vpand x0, x1, x1; \ + vpxor x1, x4, x4; \ + vpxor x3, x0, x0; + +#define SI5_1(x0, x1, x2, x3, x4) \ + vpor x2, x1, tp; \ + vpxor x1, x2, x2; \ + vpxor x3, tp, tp; \ + vpand x1, x3, x3; \ + vpxor x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x3, x3; \ + vpor x0, x2, x2; +#define SI5_2(x0, x1, x2, x3, x4) \ + vpxor tp, x1, x4; \ + vpxor x4, x2, x2; \ + vpand x0, x4, x4; \ + vpxor tp, x0, x0; \ + vpxor x3, tp, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpxor x2, x0, x0; \ + vpxor x4, x2, x2; \ + vpxor x3, x4, x4; + +#define SI6_1(x0, x1, x2, x3, x4) \ + vpxor x2, x0, x0; \ + vpand x3, x0, tp; \ + vpxor x3, x2, x2; \ + vpxor x2, tp, tp; \ + vpxor x1, x3, x3; \ + vpor x0, x2, x2; \ + vpxor x3, x2, x2; \ + vpand tp, x3, x3; +#define SI6_2(x0, x1, x2, x3, x4) \ + vpxor RNOT, tp, tp; \ + vpxor x1, x3, x3; \ + vpand x2, x1, x1; \ + vpxor tp, x0, x4; \ + vpxor x4, x3, x3; \ + vpxor x2, x4, x4; \ + vpxor x1, tp, x0; \ + vpxor x0, x2, x2; + +#define SI7_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x2, x0, x0; \ + vpor x3, x2, x2; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpor tp, x1, x1; \ + vpxor x0, x4, x4; \ + vpand x2, x0, x0; \ + vpxor x1, x0, x0; +#define SI7_2(x0, x1, x2, x3, x4) \ + vpand x2, x1, x1; \ + vpxor x2, tp, x3; \ + vpxor x3, x4, x4; \ + vpand x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor x4, x1, x1; \ + vpxor x4, x3, x3; \ + vpand x0, x4, x4; \ + vpxor x2, x4, x4; + +#define get_key(i, j, t) \ + vbroadcastss (4*(i)+(j))*4(CTX), t; + +#define K2(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + get_key(i, 1, RK1); \ + get_key(i, 2, RK2); \ + get_key(i, 3, RK3); \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; + +#define LK2(x0, x1, x2, x3, x4, i) \ + vpslld $13, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $13, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $1, x1 ## 1, x4 ## 1; \ + vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + get_key(i, 1, RK1); \ + vpslld $1, x1 ## 2, x4 ## 2; \ + vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + get_key(i, 3, RK3); \ + vpslld $7, x3 ## 1, x4 ## 1; \ + vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + get_key(i, 0, RK0); \ + vpslld $7, x3 ## 2, x4 ## 2; \ + vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + get_key(i, 2, RK2); \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpslld $5, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $22, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpslld $5, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $22, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; + +#define KL2(x0, x1, x2, x3, x4, i) \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpsrld $5, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpsrld $22, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpsrld $5, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpsrld $22, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $1, x1 ## 1, x4 ## 1; \ + vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpsrld $1, x1 ## 2, x4 ## 2; \ + vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpsrld $7, x3 ## 1, x4 ## 1; \ + vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $7, x3 ## 2, x4 ## 2; \ + vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $13, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $3, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $13, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $3, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; + +#define S(SBOX, x0, x1, x2, x3, x4) \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); + +#define SP(SBOX, x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 2, RK2); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 3, RK3); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + get_key(i, 1, RK1); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +.align 8 +SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) + /* input: + * %rdi: ctx, CTX + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks + * output: + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 0); + S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); + S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); + S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); + S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); + S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); + S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); + S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); + S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); + S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); + S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); + S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); + S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); + S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); + S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); + S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); + S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); + S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); + S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); + S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); + S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); + S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); + S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); + S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); + S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); + S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); + S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); + S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); + S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); + S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); + S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); + S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); + S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); + + write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + RET; +SYM_FUNC_END(__serpent_enc_blk8_avx) + +.align 8 +SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) + /* input: + * %rdi: ctx, CTX + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks + * output: + * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 32); + SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); + SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); + SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); + SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); + SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); + SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); + SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); + SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); + SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); + SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); + SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); + SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); + SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); + SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); + SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); + SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); + SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); + SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); + SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); + SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); + SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); + SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); + SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); + SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); + SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); + SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); + SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); + SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); + SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); + SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); + SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); + S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); + + write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + RET; +SYM_FUNC_END(__serpent_dec_blk8_avx) + +SYM_FUNC_START(serpent_ecb_enc_8way_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_enc_blk8_avx; + + store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END + RET; +SYM_FUNC_END(serpent_ecb_enc_8way_avx) + +SYM_FUNC_START(serpent_ecb_dec_8way_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_dec_blk8_avx; + + store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + FRAME_END + RET; +SYM_FUNC_END(serpent_ecb_dec_8way_avx) + +SYM_FUNC_START(serpent_cbc_dec_8way_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_dec_blk8_avx; + + store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + FRAME_END + RET; +SYM_FUNC_END(serpent_cbc_dec_8way_avx) diff --git a/arch/x86/crypto/serpent-avx.h b/arch/x86/crypto/serpent-avx.h new file mode 100644 index 000000000..23f3361a0 --- /dev/null +++ b/arch/x86/crypto/serpent-avx.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_SERPENT_AVX_H +#define ASM_X86_SERPENT_AVX_H + +#include +#include +#include + +struct crypto_skcipher; + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, + const u8 *src); + +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, + const u8 *src); + +#endif diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S new file mode 100644 index 000000000..8ea34c9b9 --- /dev/null +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -0,0 +1,726 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * x86_64/AVX2 assembler optimized version of Serpent + * + * Copyright © 2012-2013 Jussi Kivilinna + * + * Based on AVX assembler implementation of Serpent by: + * Copyright © 2012 Johannes Goetzfried + * + */ + +#include +#include +#include "glue_helper-asm-avx2.S" + +.file "serpent-avx2-asm_64.S" + +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.text + +#define CTX %rdi + +#define RNOT %ymm0 +#define tp %ymm1 + +#define RA1 %ymm2 +#define RA2 %ymm3 +#define RB1 %ymm4 +#define RB2 %ymm5 +#define RC1 %ymm6 +#define RC2 %ymm7 +#define RD1 %ymm8 +#define RD2 %ymm9 +#define RE1 %ymm10 +#define RE2 %ymm11 + +#define RK0 %ymm12 +#define RK1 %ymm13 +#define RK2 %ymm14 +#define RK3 %ymm15 + +#define RK0x %xmm12 +#define RK1x %xmm13 +#define RK2x %xmm14 +#define RK3x %xmm15 + +#define S0_1(x0, x1, x2, x3, x4) \ + vpor x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x4; \ + vpxor RNOT, x4, x4; \ + vpxor x1, tp, x3; \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpxor x0, x2, x2; +#define S0_2(x0, x1, x2, x3, x4) \ + vpxor x3, x0, x0; \ + vpor x0, x4, x4; \ + vpxor x2, x0, x0; \ + vpand x1, x2, x2; \ + vpxor x2, x3, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x4, x2, x2; \ + vpxor x2, x1, x1; + +#define S1_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, tp; \ + vpxor x3, x0, x0; \ + vpxor RNOT, x3, x3; \ + vpand tp, x1, x4; \ + vpor tp, x0, x0; \ + vpxor x2, x3, x3; \ + vpxor x3, x0, x0; \ + vpxor x3, tp, x1; +#define S1_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpor x4, x1, x1; \ + vpxor x2, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x2, x2; \ + vpor x0, x1, x1; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x0, x0; \ + vpxor x1, x4, x4; + +#define S2_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, tp; \ + vpxor x3, tp, tp; \ + vpor x0, x3, x3; \ + vpxor x1, x2, x2; \ + vpxor x1, x3, x3; \ + vpand tp, x1, x1; +#define S2_2(x0, x1, x2, x3, x4) \ + vpxor x2, tp, tp; \ + vpand x3, x2, x2; \ + vpor x1, x3, x3; \ + vpxor RNOT, tp, tp; \ + vpxor tp, x3, x3; \ + vpxor tp, x0, x4; \ + vpxor x2, tp, x0; \ + vpor x2, x1, x1; + +#define S3_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, tp; \ + vpor x0, x3, x3; \ + vpand x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpxor tp, x2, x2; \ + vpand x3, tp, x1; \ + vpxor x3, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x4, x4; +#define S3_2(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpand x3, x0, x0; \ + vpand x4, x3, x3; \ + vpxor x2, x3, x3; \ + vpor x1, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x4, x4; \ + vpxor x3, x0, x0; \ + vpxor x2, x3, x3; + +#define S4_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x3, x0, x0; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor x1, x0, x0; \ + vpxor tp, x3, x4; \ + vpor x0, x2, x2; \ + vpxor x1, x2, x2; +#define S4_2(x0, x1, x2, x3, x4) \ + vpand x0, x1, x1; \ + vpxor x4, x1, x1; \ + vpand x2, x4, x4; \ + vpxor tp, x2, x2; \ + vpxor x0, x4, x4; \ + vpor x1, tp, x3; \ + vpxor RNOT, x1, x1; \ + vpxor x0, x3, x3; + +#define S5_1(x0, x1, x2, x3, x4) \ + vpor x0, x1, tp; \ + vpxor tp, x2, x2; \ + vpxor RNOT, x3, x3; \ + vpxor x0, x1, x4; \ + vpxor x2, x0, x0; \ + vpand x4, tp, x1; \ + vpor x3, x4, x4; \ + vpxor x0, x4, x4; +#define S5_2(x0, x1, x2, x3, x4) \ + vpand x3, x0, x0; \ + vpxor x3, x1, x1; \ + vpxor x2, x3, x3; \ + vpxor x1, x0, x0; \ + vpand x4, x2, x2; \ + vpxor x2, x1, x1; \ + vpand x0, x2, x2; \ + vpxor x2, x3, x3; + +#define S6_1(x0, x1, x2, x3, x4) \ + vpxor x0, x3, x3; \ + vpxor x2, x1, tp; \ + vpxor x0, x2, x2; \ + vpand x3, x0, x0; \ + vpor x3, tp, tp; \ + vpxor RNOT, x1, x4; \ + vpxor tp, x0, x0; \ + vpxor x2, tp, x1; +#define S6_2(x0, x1, x2, x3, x4) \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; \ + vpand x0, x2, x2; \ + vpxor x1, x4, x4; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x3, x3; \ + vpxor x2, x1, x1; + +#define S7_1(x0, x1, x2, x3, x4) \ + vpxor RNOT, x1, tp; \ + vpxor RNOT, x0, x0; \ + vpand x2, tp, x1; \ + vpxor x3, x1, x1; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x4; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x3; \ + vpor x1, x0, x0; +#define S7_2(x0, x1, x2, x3, x4) \ + vpand x0, x2, x2; \ + vpxor x4, x0, x0; \ + vpxor x3, x4, x4; \ + vpand x0, x3, x3; \ + vpxor x1, x4, x4; \ + vpxor x4, x2, x2; \ + vpxor x1, x3, x3; \ + vpor x0, x4, x4; \ + vpxor x1, x4, x4; + +#define SI0_1(x0, x1, x2, x3, x4) \ + vpxor x0, x1, x1; \ + vpor x1, x3, tp; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpxor tp, x2, x2; \ + vpxor x0, tp, x3; \ + vpand x1, x0, x0; \ + vpxor x2, x0, x0; +#define SI0_2(x0, x1, x2, x3, x4) \ + vpand x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x3, x2, x2; \ + vpxor x3, x1, x1; \ + vpand x0, x3, x3; \ + vpxor x0, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x4, x4; + +#define SI1_1(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, tp; \ + vpxor RNOT, x2, x2; \ + vpor x1, x0, x4; \ + vpxor x3, x4, x4; \ + vpand x1, x3, x3; \ + vpxor x2, x1, x1; \ + vpand x4, x2, x2; +#define SI1_2(x0, x1, x2, x3, x4) \ + vpxor x1, x4, x4; \ + vpor x3, x1, x1; \ + vpxor tp, x3, x3; \ + vpxor tp, x2, x2; \ + vpor x4, tp, x0; \ + vpxor x4, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor x1, x4, x4; + +#define SI2_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpxor RNOT, x3, tp; \ + vpor x2, tp, tp; \ + vpxor x3, x2, x2; \ + vpxor x0, x3, x4; \ + vpxor x1, tp, x3; \ + vpor x2, x1, x1; \ + vpxor x0, x2, x2; +#define SI2_2(x0, x1, x2, x3, x4) \ + vpxor x4, x1, x1; \ + vpor x3, x4, x4; \ + vpxor x3, x2, x2; \ + vpxor x2, x4, x4; \ + vpand x1, x2, x2; \ + vpxor x3, x2, x2; \ + vpxor x4, x3, x3; \ + vpxor x0, x4, x4; + +#define SI3_1(x0, x1, x2, x3, x4) \ + vpxor x1, x2, x2; \ + vpand x2, x1, tp; \ + vpxor x0, tp, tp; \ + vpor x1, x0, x0; \ + vpxor x3, x1, x4; \ + vpxor x3, x0, x0; \ + vpor tp, x3, x3; \ + vpxor x2, tp, x1; +#define SI3_2(x0, x1, x2, x3, x4) \ + vpxor x3, x1, x1; \ + vpxor x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpand x1, x3, x3; \ + vpxor x0, x1, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x1, x0, x0; + +#define SI4_1(x0, x1, x2, x3, x4) \ + vpxor x3, x2, x2; \ + vpand x1, x0, tp; \ + vpxor x2, tp, tp; \ + vpor x3, x2, x2; \ + vpxor RNOT, x0, x4; \ + vpxor tp, x1, x1; \ + vpxor x2, tp, x0; \ + vpand x4, x2, x2; +#define SI4_2(x0, x1, x2, x3, x4) \ + vpxor x0, x2, x2; \ + vpor x4, x0, x0; \ + vpxor x3, x0, x0; \ + vpand x2, x3, x3; \ + vpxor x3, x4, x4; \ + vpxor x1, x3, x3; \ + vpand x0, x1, x1; \ + vpxor x1, x4, x4; \ + vpxor x3, x0, x0; + +#define SI5_1(x0, x1, x2, x3, x4) \ + vpor x2, x1, tp; \ + vpxor x1, x2, x2; \ + vpxor x3, tp, tp; \ + vpand x1, x3, x3; \ + vpxor x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor RNOT, x0, x0; \ + vpxor x2, x3, x3; \ + vpor x0, x2, x2; +#define SI5_2(x0, x1, x2, x3, x4) \ + vpxor tp, x1, x4; \ + vpxor x4, x2, x2; \ + vpand x0, x4, x4; \ + vpxor tp, x0, x0; \ + vpxor x3, tp, x1; \ + vpand x2, x0, x0; \ + vpxor x3, x2, x2; \ + vpxor x2, x0, x0; \ + vpxor x4, x2, x2; \ + vpxor x3, x4, x4; + +#define SI6_1(x0, x1, x2, x3, x4) \ + vpxor x2, x0, x0; \ + vpand x3, x0, tp; \ + vpxor x3, x2, x2; \ + vpxor x2, tp, tp; \ + vpxor x1, x3, x3; \ + vpor x0, x2, x2; \ + vpxor x3, x2, x2; \ + vpand tp, x3, x3; +#define SI6_2(x0, x1, x2, x3, x4) \ + vpxor RNOT, tp, tp; \ + vpxor x1, x3, x3; \ + vpand x2, x1, x1; \ + vpxor tp, x0, x4; \ + vpxor x4, x3, x3; \ + vpxor x2, x4, x4; \ + vpxor x1, tp, x0; \ + vpxor x0, x2, x2; + +#define SI7_1(x0, x1, x2, x3, x4) \ + vpand x0, x3, tp; \ + vpxor x2, x0, x0; \ + vpor x3, x2, x2; \ + vpxor x1, x3, x4; \ + vpxor RNOT, x0, x0; \ + vpor tp, x1, x1; \ + vpxor x0, x4, x4; \ + vpand x2, x0, x0; \ + vpxor x1, x0, x0; +#define SI7_2(x0, x1, x2, x3, x4) \ + vpand x2, x1, x1; \ + vpxor x2, tp, x3; \ + vpxor x3, x4, x4; \ + vpand x3, x2, x2; \ + vpor x0, x3, x3; \ + vpxor x4, x1, x1; \ + vpxor x4, x3, x3; \ + vpand x0, x4, x4; \ + vpxor x2, x4, x4; + +#define get_key(i,j,t) \ + vpbroadcastd (4*(i)+(j))*4(CTX), t; + +#define K2(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + get_key(i, 1, RK1); \ + get_key(i, 2, RK2); \ + get_key(i, 3, RK3); \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; + +#define LK2(x0, x1, x2, x3, x4, i) \ + vpslld $13, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $13, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $1, x1 ## 1, x4 ## 1; \ + vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + get_key(i, 1, RK1); \ + vpslld $1, x1 ## 2, x4 ## 2; \ + vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + get_key(i, 3, RK3); \ + vpslld $7, x3 ## 1, x4 ## 1; \ + vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + get_key(i, 0, RK0); \ + vpslld $7, x3 ## 2, x4 ## 2; \ + vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + get_key(i, 2, RK2); \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpslld $5, x0 ## 1, x4 ## 1; \ + vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $22, x2 ## 1, x4 ## 1; \ + vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpslld $5, x0 ## 2, x4 ## 2; \ + vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $22, x2 ## 2, x4 ## 2; \ + vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; + +#define KL2(x0, x1, x2, x3, x4, i) \ + vpxor RK0, x0 ## 1, x0 ## 1; \ + vpxor RK2, x2 ## 1, x2 ## 1; \ + vpsrld $5, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor RK3, x3 ## 1, x3 ## 1; \ + vpxor RK1, x1 ## 1, x1 ## 1; \ + vpsrld $22, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ + vpxor RK0, x0 ## 2, x0 ## 2; \ + vpxor RK2, x2 ## 2, x2 ## 2; \ + vpsrld $5, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor RK3, x3 ## 2, x3 ## 2; \ + vpxor RK1, x1 ## 2, x1 ## 2; \ + vpsrld $22, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ + vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ + vpslld $7, x1 ## 1, x4 ## 1; \ + vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $1, x1 ## 1, x4 ## 1; \ + vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ + vpor x4 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ + vpslld $7, x1 ## 2, x4 ## 2; \ + vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ + vpsrld $1, x1 ## 2, x4 ## 2; \ + vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ + vpor x4 ## 2, x1 ## 2, x1 ## 2; \ + vpsrld $7, x3 ## 1, x4 ## 1; \ + vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ + vpor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ + vpslld $3, x0 ## 1, x4 ## 1; \ + vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $7, x3 ## 2, x4 ## 2; \ + vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ + vpor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ + vpslld $3, x0 ## 2, x4 ## 2; \ + vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $13, x0 ## 1, x4 ## 1; \ + vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ + vpor x4 ## 1, x0 ## 1, x0 ## 1; \ + vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ + vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ + vpsrld $3, x2 ## 1, x4 ## 1; \ + vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ + vpor x4 ## 1, x2 ## 1, x2 ## 1; \ + vpsrld $13, x0 ## 2, x4 ## 2; \ + vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ + vpor x4 ## 2, x0 ## 2, x0 ## 2; \ + vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ + vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ + vpsrld $3, x2 ## 2, x4 ## 2; \ + vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ + vpor x4 ## 2, x2 ## 2, x2 ## 2; + +#define S(SBOX, x0, x1, x2, x3, x4) \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); + +#define SP(SBOX, x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 2, RK2); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 3, RK3); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + get_key(i, 1, RK1); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +.align 8 +SYM_FUNC_START_LOCAL(__serpent_enc_blk16) + /* input: + * %rdi: ctx, CTX + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext + * output: + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 0); + S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); + S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); + S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); + S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); + S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); + S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); + S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); + S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); + S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); + S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); + S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); + S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); + S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); + S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); + S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); + S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); + S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); + S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); + S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); + S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); + S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); + S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); + S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); + S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); + S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); + S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); + S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); + S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); + S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); + S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); + S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); + S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); + + write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + RET; +SYM_FUNC_END(__serpent_enc_blk16) + +.align 8 +SYM_FUNC_START_LOCAL(__serpent_dec_blk16) + /* input: + * %rdi: ctx, CTX + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext + * output: + * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext + */ + + vpcmpeqd RNOT, RNOT, RNOT; + + read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 32); + SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); + SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); + SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); + SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); + SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); + SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); + SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); + SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); + SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); + SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); + SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); + SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); + SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); + SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); + SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); + SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); + SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); + SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); + SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); + SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); + SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); + SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); + SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); + SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); + SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); + SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); + SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); + SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); + SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); + SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); + SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); + S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); + + write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + RET; +SYM_FUNC_END(__serpent_dec_blk16) + +SYM_FUNC_START(serpent_ecb_enc_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + vzeroupper; + + load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_enc_blk16; + + store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + vzeroupper; + + FRAME_END + RET; +SYM_FUNC_END(serpent_ecb_enc_16way) + +SYM_FUNC_START(serpent_ecb_dec_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + vzeroupper; + + load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_dec_blk16; + + store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + vzeroupper; + + FRAME_END + RET; +SYM_FUNC_END(serpent_ecb_dec_16way) + +SYM_FUNC_START(serpent_cbc_dec_16way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + vzeroupper; + + load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_dec_blk16; + + store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2, + RK0); + + vzeroupper; + + FRAME_END + RET; +SYM_FUNC_END(serpent_cbc_dec_16way) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S new file mode 100644 index 000000000..8ccb03ad7 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -0,0 +1,616 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Serpent Cipher 4-way parallel algorithm (i586/SSE2) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * Based on crypto/serpent.c by + * Copyright (C) 2002 Dag Arne Osvik + * 2003 Herbert Valerio Riedel + */ + +#include + +.file "serpent-sse2-i586-asm_32.S" +.text + +#define arg_ctx 4 +#define arg_dst 8 +#define arg_src 12 +#define arg_xor 16 + +/********************************************************************** + 4-way SSE2 serpent + **********************************************************************/ +#define CTX %edx + +#define RA %xmm0 +#define RB %xmm1 +#define RC %xmm2 +#define RD %xmm3 +#define RE %xmm4 + +#define RT0 %xmm5 +#define RT1 %xmm6 + +#define RNOT %xmm7 + +#define get_key(i, j, t) \ + movd (4*(i)+(j))*4(CTX), t; \ + pshufd $0, t, t; + +#define K(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, x4); \ + get_key(i, 1, RT0); \ + get_key(i, 2, RT1); \ + pxor x4, x0; \ + pxor RT0, x1; \ + pxor RT1, x2; \ + get_key(i, 3, x4); \ + pxor x4, x3; + +#define LK(x0, x1, x2, x3, x4, i) \ + movdqa x0, x4; \ + pslld $13, x0; \ + psrld $(32 - 13), x4; \ + por x4, x0; \ + pxor x0, x1; \ + movdqa x2, x4; \ + pslld $3, x2; \ + psrld $(32 - 3), x4; \ + por x4, x2; \ + pxor x2, x1; \ + movdqa x1, x4; \ + pslld $1, x1; \ + psrld $(32 - 1), x4; \ + por x4, x1; \ + movdqa x0, x4; \ + pslld $3, x4; \ + pxor x2, x3; \ + pxor x4, x3; \ + movdqa x3, x4; \ + pslld $7, x3; \ + psrld $(32 - 7), x4; \ + por x4, x3; \ + movdqa x1, x4; \ + pslld $7, x4; \ + pxor x1, x0; \ + pxor x3, x0; \ + pxor x3, x2; \ + pxor x4, x2; \ + movdqa x0, x4; \ + get_key(i, 1, RT0); \ + pxor RT0, x1; \ + get_key(i, 3, RT0); \ + pxor RT0, x3; \ + pslld $5, x0; \ + psrld $(32 - 5), x4; \ + por x4, x0; \ + movdqa x2, x4; \ + pslld $22, x2; \ + psrld $(32 - 22), x4; \ + por x4, x2; \ + get_key(i, 0, RT0); \ + pxor RT0, x0; \ + get_key(i, 2, RT0); \ + pxor RT0, x2; + +#define KL(x0, x1, x2, x3, x4, i) \ + K(x0, x1, x2, x3, x4, i); \ + movdqa x0, x4; \ + psrld $5, x0; \ + pslld $(32 - 5), x4; \ + por x4, x0; \ + movdqa x2, x4; \ + psrld $22, x2; \ + pslld $(32 - 22), x4; \ + por x4, x2; \ + pxor x3, x2; \ + pxor x3, x0; \ + movdqa x1, x4; \ + pslld $7, x4; \ + pxor x1, x0; \ + pxor x4, x2; \ + movdqa x1, x4; \ + psrld $1, x1; \ + pslld $(32 - 1), x4; \ + por x4, x1; \ + movdqa x3, x4; \ + psrld $7, x3; \ + pslld $(32 - 7), x4; \ + por x4, x3; \ + pxor x0, x1; \ + movdqa x0, x4; \ + pslld $3, x4; \ + pxor x4, x3; \ + movdqa x0, x4; \ + psrld $13, x0; \ + pslld $(32 - 13), x4; \ + por x4, x0; \ + pxor x2, x1; \ + pxor x2, x3; \ + movdqa x2, x4; \ + psrld $3, x2; \ + pslld $(32 - 3), x4; \ + por x4, x2; + +#define S0(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + por x0, x3; \ + pxor x4, x0; \ + pxor x2, x4; \ + pxor RNOT, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x4, x1; \ + pxor x0, x2; \ + pxor x3, x0; \ + por x0, x4; \ + pxor x2, x0; \ + pand x1, x2; \ + pxor x2, x3; \ + pxor RNOT, x1; \ + pxor x4, x2; \ + pxor x2, x1; + +#define S1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x1; \ + pxor x3, x0; \ + pxor RNOT, x3; \ + pand x1, x4; \ + por x1, x0; \ + pxor x2, x3; \ + pxor x3, x0; \ + pxor x3, x1; \ + pxor x4, x3; \ + por x4, x1; \ + pxor x2, x4; \ + pand x0, x2; \ + pxor x1, x2; \ + por x0, x1; \ + pxor RNOT, x0; \ + pxor x2, x0; \ + pxor x1, x4; + +#define S2(x0, x1, x2, x3, x4) \ + pxor RNOT, x3; \ + pxor x0, x1; \ + movdqa x0, x4; \ + pand x2, x0; \ + pxor x3, x0; \ + por x4, x3; \ + pxor x1, x2; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x2, x0; \ + pand x3, x2; \ + por x1, x3; \ + pxor RNOT, x0; \ + pxor x0, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + por x2, x1; + +#define S3(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x3, x1; \ + por x0, x3; \ + pand x0, x4; \ + pxor x2, x0; \ + pxor x1, x2; \ + pand x3, x1; \ + pxor x3, x2; \ + por x4, x0; \ + pxor x3, x4; \ + pxor x0, x1; \ + pand x3, x0; \ + pand x4, x3; \ + pxor x2, x3; \ + por x1, x4; \ + pand x1, x2; \ + pxor x3, x4; \ + pxor x3, x0; \ + pxor x2, x3; + +#define S4(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x4, x0; \ + pxor x2, x3; \ + por x4, x2; \ + pxor x1, x0; \ + pxor x3, x4; \ + por x0, x2; \ + pxor x1, x2; \ + pand x0, x1; \ + pxor x4, x1; \ + pand x2, x4; \ + pxor x3, x2; \ + pxor x0, x4; \ + por x1, x3; \ + pxor RNOT, x1; \ + pxor x0, x3; + +#define S5(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x0, x1; \ + pxor x1, x2; \ + pxor RNOT, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + pand x4, x1; \ + por x3, x4; \ + pxor x0, x4; \ + pand x3, x0; \ + pxor x3, x1; \ + pxor x2, x3; \ + pxor x1, x0; \ + pand x4, x2; \ + pxor x2, x1; \ + pand x0, x2; \ + pxor x2, x3; + +#define S6(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x3; \ + pxor x2, x1; \ + pxor x0, x2; \ + pand x3, x0; \ + por x3, x1; \ + pxor RNOT, x4; \ + pxor x1, x0; \ + pxor x2, x1; \ + pxor x4, x3; \ + pxor x0, x4; \ + pand x0, x2; \ + pxor x1, x4; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x3; \ + pxor x2, x1; + +#define S7(x0, x1, x2, x3, x4) \ + pxor RNOT, x1; \ + movdqa x1, x4; \ + pxor RNOT, x0; \ + pand x2, x1; \ + pxor x3, x1; \ + por x4, x3; \ + pxor x2, x4; \ + pxor x3, x2; \ + pxor x0, x3; \ + por x1, x0; \ + pand x0, x2; \ + pxor x4, x0; \ + pxor x3, x4; \ + pand x0, x3; \ + pxor x1, x4; \ + pxor x4, x2; \ + pxor x1, x3; \ + por x0, x4; \ + pxor x1, x4; + +#define SI0(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pxor x0, x1; \ + por x1, x3; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + pxor x3, x2; \ + pxor x0, x3; \ + pand x1, x0; \ + pxor x2, x0; \ + pand x3, x2; \ + pxor x4, x3; \ + pxor x3, x2; \ + pxor x3, x1; \ + pand x0, x3; \ + pxor x0, x1; \ + pxor x2, x0; \ + pxor x3, x4; + +#define SI1(x0, x1, x2, x3, x4) \ + pxor x3, x1; \ + movdqa x0, x4; \ + pxor x2, x0; \ + pxor RNOT, x2; \ + por x1, x4; \ + pxor x3, x4; \ + pand x1, x3; \ + pxor x2, x1; \ + pand x4, x2; \ + pxor x1, x4; \ + por x3, x1; \ + pxor x0, x3; \ + pxor x0, x2; \ + por x4, x0; \ + pxor x4, x2; \ + pxor x0, x1; \ + pxor x1, x4; + +#define SI2(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x3, x4; \ + pxor RNOT, x3; \ + por x2, x3; \ + pxor x4, x2; \ + pxor x0, x4; \ + pxor x1, x3; \ + por x2, x1; \ + pxor x0, x2; \ + pxor x4, x1; \ + por x3, x4; \ + pxor x3, x2; \ + pxor x2, x4; \ + pand x1, x2; \ + pxor x3, x2; \ + pxor x4, x3; \ + pxor x0, x4; + +#define SI3(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x1, x4; \ + pand x2, x1; \ + pxor x0, x1; \ + por x4, x0; \ + pxor x3, x4; \ + pxor x3, x0; \ + por x1, x3; \ + pxor x2, x1; \ + pxor x3, x1; \ + pxor x2, x0; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x1; \ + pand x2, x0; \ + pxor x3, x4; \ + pxor x0, x3; \ + pxor x1, x0; + +#define SI4(x0, x1, x2, x3, x4) \ + pxor x3, x2; \ + movdqa x0, x4; \ + pand x1, x0; \ + pxor x2, x0; \ + por x3, x2; \ + pxor RNOT, x4; \ + pxor x0, x1; \ + pxor x2, x0; \ + pand x4, x2; \ + pxor x0, x2; \ + por x4, x0; \ + pxor x3, x0; \ + pand x2, x3; \ + pxor x3, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x1, x4; \ + pxor x3, x0; + +#define SI5(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x2, x1; \ + pxor x4, x2; \ + pxor x3, x1; \ + pand x4, x3; \ + pxor x3, x2; \ + por x0, x3; \ + pxor RNOT, x0; \ + pxor x2, x3; \ + por x0, x2; \ + pxor x1, x4; \ + pxor x4, x2; \ + pand x0, x4; \ + pxor x1, x0; \ + pxor x3, x1; \ + pand x2, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x4, x2; \ + pxor x3, x4; + +#define SI6(x0, x1, x2, x3, x4) \ + pxor x2, x0; \ + movdqa x0, x4; \ + pand x3, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x1, x3; \ + por x4, x2; \ + pxor x3, x2; \ + pand x0, x3; \ + pxor RNOT, x0; \ + pxor x1, x3; \ + pand x2, x1; \ + pxor x0, x4; \ + pxor x4, x3; \ + pxor x2, x4; \ + pxor x1, x0; \ + pxor x0, x2; + +#define SI7(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x2, x0; \ + por x4, x2; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + por x3, x1; \ + pxor x0, x4; \ + pand x2, x0; \ + pxor x1, x0; \ + pand x2, x1; \ + pxor x2, x3; \ + pxor x3, x4; \ + pand x3, x2; \ + por x0, x3; \ + pxor x4, x1; \ + pxor x4, x3; \ + pand x0, x4; \ + pxor x2, x4; + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + movdqa x0, t2; \ + punpckldq x1, x0; \ + punpckhdq x1, t2; \ + movdqa x2, t1; \ + punpckhdq x3, x2; \ + punpckldq x3, t1; \ + movdqa x0, x1; \ + punpcklqdq t1, x0; \ + punpckhqdq t1, x1; \ + movdqa t2, x3; \ + punpcklqdq x2, t2; \ + punpckhqdq x2, x3; \ + movdqa t2, x2; + +#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + movdqu (0*4*4)(in), x0; \ + movdqu (1*4*4)(in), x1; \ + movdqu (2*4*4)(in), x2; \ + movdqu (3*4*4)(in), x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu x0, (0*4*4)(out); \ + movdqu x1, (1*4*4)(out); \ + movdqu x2, (2*4*4)(out); \ + movdqu x3, (3*4*4)(out); + +#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu (0*4*4)(out), t0; \ + pxor t0, x0; \ + movdqu x0, (0*4*4)(out); \ + movdqu (1*4*4)(out), t0; \ + pxor t0, x1; \ + movdqu x1, (1*4*4)(out); \ + movdqu (2*4*4)(out), t0; \ + pxor t0, x2; \ + movdqu x2, (2*4*4)(out); \ + movdqu (3*4*4)(out), t0; \ + pxor t0, x3; \ + movdqu x3, (3*4*4)(out); + +SYM_FUNC_START(__serpent_enc_blk_4way) + /* input: + * arg_ctx(%esp): ctx, CTX + * arg_dst(%esp): dst + * arg_src(%esp): src + * arg_xor(%esp): bool, if true: xor output + */ + + pcmpeqd RNOT, RNOT; + + movl arg_ctx(%esp), CTX; + + movl arg_src(%esp), %eax; + read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + K(RA, RB, RC, RD, RE, 0); + S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); + S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); + S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); + S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); + S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); + S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); + S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); + S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); + S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); + S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); + S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); + S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); + S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); + S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); + S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); + S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); + S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); + S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); + S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); + S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); + S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); + S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); + S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); + S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); + S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); + S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); + S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); + S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); + S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); + S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); + S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); + S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); + + movl arg_dst(%esp), %eax; + + cmpb $0, arg_xor(%esp); + jnz .L__enc_xor4; + + write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + RET; + +.L__enc_xor4: + xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + RET; +SYM_FUNC_END(__serpent_enc_blk_4way) + +SYM_FUNC_START(serpent_dec_blk_4way) + /* input: + * arg_ctx(%esp): ctx, CTX + * arg_dst(%esp): dst + * arg_src(%esp): src + */ + + pcmpeqd RNOT, RNOT; + + movl arg_ctx(%esp), CTX; + + movl arg_src(%esp), %eax; + read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + K(RA, RB, RC, RD, RE, 32); + SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); + SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); + SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); + SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); + SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); + SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); + SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); + SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); + SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); + SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); + SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); + SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); + SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); + SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); + SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); + SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); + SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); + SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); + SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); + SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); + SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); + SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); + SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); + SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); + SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); + SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); + SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); + SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); + SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); + SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); + SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); + SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); + + movl arg_dst(%esp), %eax; + write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); + + RET; +SYM_FUNC_END(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S new file mode 100644 index 000000000..e0998a011 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -0,0 +1,739 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * Based on crypto/serpent.c by + * Copyright (C) 2002 Dag Arne Osvik + * 2003 Herbert Valerio Riedel + */ + +#include + +.file "serpent-sse2-x86_64-asm_64.S" +.text + +#define CTX %rdi + +/********************************************************************** + 8-way SSE2 serpent + **********************************************************************/ +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 +#define RE1 %xmm4 + +#define RA2 %xmm5 +#define RB2 %xmm6 +#define RC2 %xmm7 +#define RD2 %xmm8 +#define RE2 %xmm9 + +#define RNOT %xmm10 + +#define RK0 %xmm11 +#define RK1 %xmm12 +#define RK2 %xmm13 +#define RK3 %xmm14 + +#define S0_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + por x0, x3; \ + pxor x4, x0; \ + pxor x2, x4; \ + pxor RNOT, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x4, x1; \ + pxor x0, x2; +#define S0_2(x0, x1, x2, x3, x4) \ + pxor x3, x0; \ + por x0, x4; \ + pxor x2, x0; \ + pand x1, x2; \ + pxor x2, x3; \ + pxor RNOT, x1; \ + pxor x4, x2; \ + pxor x2, x1; + +#define S1_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x1; \ + pxor x3, x0; \ + pxor RNOT, x3; \ + pand x1, x4; \ + por x1, x0; \ + pxor x2, x3; \ + pxor x3, x0; \ + pxor x3, x1; +#define S1_2(x0, x1, x2, x3, x4) \ + pxor x4, x3; \ + por x4, x1; \ + pxor x2, x4; \ + pand x0, x2; \ + pxor x1, x2; \ + por x0, x1; \ + pxor RNOT, x0; \ + pxor x2, x0; \ + pxor x1, x4; + +#define S2_1(x0, x1, x2, x3, x4) \ + pxor RNOT, x3; \ + pxor x0, x1; \ + movdqa x0, x4; \ + pand x2, x0; \ + pxor x3, x0; \ + por x4, x3; \ + pxor x1, x2; \ + pxor x1, x3; \ + pand x0, x1; +#define S2_2(x0, x1, x2, x3, x4) \ + pxor x2, x0; \ + pand x3, x2; \ + por x1, x3; \ + pxor RNOT, x0; \ + pxor x0, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + por x2, x1; + +#define S3_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x3, x1; \ + por x0, x3; \ + pand x0, x4; \ + pxor x2, x0; \ + pxor x1, x2; \ + pand x3, x1; \ + pxor x3, x2; \ + por x4, x0; \ + pxor x3, x4; +#define S3_2(x0, x1, x2, x3, x4) \ + pxor x0, x1; \ + pand x3, x0; \ + pand x4, x3; \ + pxor x2, x3; \ + por x1, x4; \ + pand x1, x2; \ + pxor x3, x4; \ + pxor x3, x0; \ + pxor x2, x3; + +#define S4_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x4, x0; \ + pxor x2, x3; \ + por x4, x2; \ + pxor x1, x0; \ + pxor x3, x4; \ + por x0, x2; \ + pxor x1, x2; +#define S4_2(x0, x1, x2, x3, x4) \ + pand x0, x1; \ + pxor x4, x1; \ + pand x2, x4; \ + pxor x3, x2; \ + pxor x0, x4; \ + por x1, x3; \ + pxor RNOT, x1; \ + pxor x0, x3; + +#define S5_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x0, x1; \ + pxor x1, x2; \ + pxor RNOT, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + pand x4, x1; \ + por x3, x4; \ + pxor x0, x4; +#define S5_2(x0, x1, x2, x3, x4) \ + pand x3, x0; \ + pxor x3, x1; \ + pxor x2, x3; \ + pxor x1, x0; \ + pand x4, x2; \ + pxor x2, x1; \ + pand x0, x2; \ + pxor x2, x3; + +#define S6_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x3; \ + pxor x2, x1; \ + pxor x0, x2; \ + pand x3, x0; \ + por x3, x1; \ + pxor RNOT, x4; \ + pxor x1, x0; \ + pxor x2, x1; +#define S6_2(x0, x1, x2, x3, x4) \ + pxor x4, x3; \ + pxor x0, x4; \ + pand x0, x2; \ + pxor x1, x4; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x3; \ + pxor x2, x1; + +#define S7_1(x0, x1, x2, x3, x4) \ + pxor RNOT, x1; \ + movdqa x1, x4; \ + pxor RNOT, x0; \ + pand x2, x1; \ + pxor x3, x1; \ + por x4, x3; \ + pxor x2, x4; \ + pxor x3, x2; \ + pxor x0, x3; \ + por x1, x0; +#define S7_2(x0, x1, x2, x3, x4) \ + pand x0, x2; \ + pxor x4, x0; \ + pxor x3, x4; \ + pand x0, x3; \ + pxor x1, x4; \ + pxor x4, x2; \ + pxor x1, x3; \ + por x0, x4; \ + pxor x1, x4; + +#define SI0_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pxor x0, x1; \ + por x1, x3; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + pxor x3, x2; \ + pxor x0, x3; \ + pand x1, x0; \ + pxor x2, x0; +#define SI0_2(x0, x1, x2, x3, x4) \ + pand x3, x2; \ + pxor x4, x3; \ + pxor x3, x2; \ + pxor x3, x1; \ + pand x0, x3; \ + pxor x0, x1; \ + pxor x2, x0; \ + pxor x3, x4; + +#define SI1_1(x0, x1, x2, x3, x4) \ + pxor x3, x1; \ + movdqa x0, x4; \ + pxor x2, x0; \ + pxor RNOT, x2; \ + por x1, x4; \ + pxor x3, x4; \ + pand x1, x3; \ + pxor x2, x1; \ + pand x4, x2; +#define SI1_2(x0, x1, x2, x3, x4) \ + pxor x1, x4; \ + por x3, x1; \ + pxor x0, x3; \ + pxor x0, x2; \ + por x4, x0; \ + pxor x4, x2; \ + pxor x0, x1; \ + pxor x1, x4; + +#define SI2_1(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x3, x4; \ + pxor RNOT, x3; \ + por x2, x3; \ + pxor x4, x2; \ + pxor x0, x4; \ + pxor x1, x3; \ + por x2, x1; \ + pxor x0, x2; +#define SI2_2(x0, x1, x2, x3, x4) \ + pxor x4, x1; \ + por x3, x4; \ + pxor x3, x2; \ + pxor x2, x4; \ + pand x1, x2; \ + pxor x3, x2; \ + pxor x4, x3; \ + pxor x0, x4; + +#define SI3_1(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x1, x4; \ + pand x2, x1; \ + pxor x0, x1; \ + por x4, x0; \ + pxor x3, x4; \ + pxor x3, x0; \ + por x1, x3; \ + pxor x2, x1; +#define SI3_2(x0, x1, x2, x3, x4) \ + pxor x3, x1; \ + pxor x2, x0; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x1; \ + pand x2, x0; \ + pxor x3, x4; \ + pxor x0, x3; \ + pxor x1, x0; + +#define SI4_1(x0, x1, x2, x3, x4) \ + pxor x3, x2; \ + movdqa x0, x4; \ + pand x1, x0; \ + pxor x2, x0; \ + por x3, x2; \ + pxor RNOT, x4; \ + pxor x0, x1; \ + pxor x2, x0; \ + pand x4, x2; +#define SI4_2(x0, x1, x2, x3, x4) \ + pxor x0, x2; \ + por x4, x0; \ + pxor x3, x0; \ + pand x2, x3; \ + pxor x3, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x1, x4; \ + pxor x3, x0; + +#define SI5_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x2, x1; \ + pxor x4, x2; \ + pxor x3, x1; \ + pand x4, x3; \ + pxor x3, x2; \ + por x0, x3; \ + pxor RNOT, x0; \ + pxor x2, x3; \ + por x0, x2; +#define SI5_2(x0, x1, x2, x3, x4) \ + pxor x1, x4; \ + pxor x4, x2; \ + pand x0, x4; \ + pxor x1, x0; \ + pxor x3, x1; \ + pand x2, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x4, x2; \ + pxor x3, x4; + +#define SI6_1(x0, x1, x2, x3, x4) \ + pxor x2, x0; \ + movdqa x0, x4; \ + pand x3, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x1, x3; \ + por x4, x2; \ + pxor x3, x2; \ + pand x0, x3; +#define SI6_2(x0, x1, x2, x3, x4) \ + pxor RNOT, x0; \ + pxor x1, x3; \ + pand x2, x1; \ + pxor x0, x4; \ + pxor x4, x3; \ + pxor x2, x4; \ + pxor x1, x0; \ + pxor x0, x2; + +#define SI7_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x2, x0; \ + por x4, x2; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + por x3, x1; \ + pxor x0, x4; \ + pand x2, x0; \ + pxor x1, x0; +#define SI7_2(x0, x1, x2, x3, x4) \ + pand x2, x1; \ + pxor x2, x3; \ + pxor x3, x4; \ + pand x3, x2; \ + por x0, x3; \ + pxor x4, x1; \ + pxor x4, x3; \ + pand x0, x4; \ + pxor x2, x4; + +#define get_key(i, j, t) \ + movd (4*(i)+(j))*4(CTX), t; \ + pshufd $0, t, t; + +#define K2(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + get_key(i, 1, RK1); \ + get_key(i, 2, RK2); \ + get_key(i, 3, RK3); \ + pxor RK0, x0 ## 1; \ + pxor RK1, x1 ## 1; \ + pxor RK2, x2 ## 1; \ + pxor RK3, x3 ## 1; \ + pxor RK0, x0 ## 2; \ + pxor RK1, x1 ## 2; \ + pxor RK2, x2 ## 2; \ + pxor RK3, x3 ## 2; + +#define LK2(x0, x1, x2, x3, x4, i) \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $13, x0 ## 1; \ + psrld $(32 - 13), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + pxor x0 ## 1, x1 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + pslld $3, x2 ## 1; \ + psrld $(32 - 3), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + pxor x2 ## 1, x1 ## 1; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $13, x0 ## 2; \ + psrld $(32 - 13), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + pxor x0 ## 2, x1 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + pslld $3, x2 ## 2; \ + psrld $(32 - 3), x4 ## 2; \ + por x4 ## 2, x2 ## 2; \ + pxor x2 ## 2, x1 ## 2; \ + movdqa x1 ## 1, x4 ## 1; \ + pslld $1, x1 ## 1; \ + psrld $(32 - 1), x4 ## 1; \ + por x4 ## 1, x1 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $3, x4 ## 1; \ + pxor x2 ## 1, x3 ## 1; \ + pxor x4 ## 1, x3 ## 1; \ + movdqa x3 ## 1, x4 ## 1; \ + get_key(i, 1, RK1); \ + movdqa x1 ## 2, x4 ## 2; \ + pslld $1, x1 ## 2; \ + psrld $(32 - 1), x4 ## 2; \ + por x4 ## 2, x1 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $3, x4 ## 2; \ + pxor x2 ## 2, x3 ## 2; \ + pxor x4 ## 2, x3 ## 2; \ + movdqa x3 ## 2, x4 ## 2; \ + get_key(i, 3, RK3); \ + pslld $7, x3 ## 1; \ + psrld $(32 - 7), x4 ## 1; \ + por x4 ## 1, x3 ## 1; \ + movdqa x1 ## 1, x4 ## 1; \ + pslld $7, x4 ## 1; \ + pxor x1 ## 1, x0 ## 1; \ + pxor x3 ## 1, x0 ## 1; \ + pxor x3 ## 1, x2 ## 1; \ + pxor x4 ## 1, x2 ## 1; \ + get_key(i, 0, RK0); \ + pslld $7, x3 ## 2; \ + psrld $(32 - 7), x4 ## 2; \ + por x4 ## 2, x3 ## 2; \ + movdqa x1 ## 2, x4 ## 2; \ + pslld $7, x4 ## 2; \ + pxor x1 ## 2, x0 ## 2; \ + pxor x3 ## 2, x0 ## 2; \ + pxor x3 ## 2, x2 ## 2; \ + pxor x4 ## 2, x2 ## 2; \ + get_key(i, 2, RK2); \ + pxor RK1, x1 ## 1; \ + pxor RK3, x3 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $5, x0 ## 1; \ + psrld $(32 - 5), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + pslld $22, x2 ## 1; \ + psrld $(32 - 22), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + pxor RK0, x0 ## 1; \ + pxor RK2, x2 ## 1; \ + pxor RK1, x1 ## 2; \ + pxor RK3, x3 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $5, x0 ## 2; \ + psrld $(32 - 5), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + pslld $22, x2 ## 2; \ + psrld $(32 - 22), x4 ## 2; \ + por x4 ## 2, x2 ## 2; \ + pxor RK0, x0 ## 2; \ + pxor RK2, x2 ## 2; + +#define KL2(x0, x1, x2, x3, x4, i) \ + pxor RK0, x0 ## 1; \ + pxor RK2, x2 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + psrld $5, x0 ## 1; \ + pslld $(32 - 5), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + pxor RK3, x3 ## 1; \ + pxor RK1, x1 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + psrld $22, x2 ## 1; \ + pslld $(32 - 22), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + pxor x3 ## 1, x2 ## 1; \ + pxor RK0, x0 ## 2; \ + pxor RK2, x2 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + psrld $5, x0 ## 2; \ + pslld $(32 - 5), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + pxor RK3, x3 ## 2; \ + pxor RK1, x1 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + psrld $22, x2 ## 2; \ + pslld $(32 - 22), x4 ## 2; \ + por x4 ## 2, x2 ## 2; \ + pxor x3 ## 2, x2 ## 2; \ + pxor x3 ## 1, x0 ## 1; \ + movdqa x1 ## 1, x4 ## 1; \ + pslld $7, x4 ## 1; \ + pxor x1 ## 1, x0 ## 1; \ + pxor x4 ## 1, x2 ## 1; \ + movdqa x1 ## 1, x4 ## 1; \ + psrld $1, x1 ## 1; \ + pslld $(32 - 1), x4 ## 1; \ + por x4 ## 1, x1 ## 1; \ + pxor x3 ## 2, x0 ## 2; \ + movdqa x1 ## 2, x4 ## 2; \ + pslld $7, x4 ## 2; \ + pxor x1 ## 2, x0 ## 2; \ + pxor x4 ## 2, x2 ## 2; \ + movdqa x1 ## 2, x4 ## 2; \ + psrld $1, x1 ## 2; \ + pslld $(32 - 1), x4 ## 2; \ + por x4 ## 2, x1 ## 2; \ + movdqa x3 ## 1, x4 ## 1; \ + psrld $7, x3 ## 1; \ + pslld $(32 - 7), x4 ## 1; \ + por x4 ## 1, x3 ## 1; \ + pxor x0 ## 1, x1 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $3, x4 ## 1; \ + pxor x4 ## 1, x3 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + movdqa x3 ## 2, x4 ## 2; \ + psrld $7, x3 ## 2; \ + pslld $(32 - 7), x4 ## 2; \ + por x4 ## 2, x3 ## 2; \ + pxor x0 ## 2, x1 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $3, x4 ## 2; \ + pxor x4 ## 2, x3 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + psrld $13, x0 ## 1; \ + pslld $(32 - 13), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + pxor x2 ## 1, x1 ## 1; \ + pxor x2 ## 1, x3 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + psrld $3, x2 ## 1; \ + pslld $(32 - 3), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + psrld $13, x0 ## 2; \ + pslld $(32 - 13), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + pxor x2 ## 2, x1 ## 2; \ + pxor x2 ## 2, x3 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + psrld $3, x2 ## 2; \ + pslld $(32 - 3), x4 ## 2; \ + por x4 ## 2, x2 ## 2; + +#define S(SBOX, x0, x1, x2, x3, x4) \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); + +#define SP(SBOX, x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 2, RK2); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + get_key(i, 3, RK3); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 1, RK1); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + movdqa x0, t2; \ + punpckldq x1, x0; \ + punpckhdq x1, t2; \ + movdqa x2, t1; \ + punpckhdq x3, x2; \ + punpckldq x3, t1; \ + movdqa x0, x1; \ + punpcklqdq t1, x0; \ + punpckhqdq t1, x1; \ + movdqa t2, x3; \ + punpcklqdq x2, t2; \ + punpckhqdq x2, x3; \ + movdqa t2, x2; + +#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + movdqu (0*4*4)(in), x0; \ + movdqu (1*4*4)(in), x1; \ + movdqu (2*4*4)(in), x2; \ + movdqu (3*4*4)(in), x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu x0, (0*4*4)(out); \ + movdqu x1, (1*4*4)(out); \ + movdqu x2, (2*4*4)(out); \ + movdqu x3, (3*4*4)(out); + +#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu (0*4*4)(out), t0; \ + pxor t0, x0; \ + movdqu x0, (0*4*4)(out); \ + movdqu (1*4*4)(out), t0; \ + pxor t0, x1; \ + movdqu x1, (1*4*4)(out); \ + movdqu (2*4*4)(out), t0; \ + pxor t0, x2; \ + movdqu x2, (2*4*4)(out); \ + movdqu (3*4*4)(out), t0; \ + pxor t0, x3; \ + movdqu x3, (3*4*4)(out); + +SYM_FUNC_START(__serpent_enc_blk_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pcmpeqd RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 0); + S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); + S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); + S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); + S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); + S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); + S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); + S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); + S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); + S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); + S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); + S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); + S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); + S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); + S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); + S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); + S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); + S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); + S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); + S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); + S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); + S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); + S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); + S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); + S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); + S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); + S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); + S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); + S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); + S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); + S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); + S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); + S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); + + leaq (4*4*4)(%rsi), %rax; + + testb %cl, %cl; + jnz .L__enc_xor8; + + write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + RET; + +.L__enc_xor8: + xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + RET; +SYM_FUNC_END(__serpent_enc_blk_8way) + +SYM_FUNC_START(serpent_dec_blk_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pcmpeqd RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 32); + SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); + SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); + SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); + SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); + SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); + SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); + SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); + SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); + SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); + SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); + SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); + SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); + SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); + SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); + SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); + SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); + SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); + SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); + SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); + SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); + SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); + SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); + SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); + SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); + SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); + SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); + SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); + SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); + SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); + SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); + SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); + S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); + + leaq (4*4*4)(%rsi), %rax; + write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + RET; +SYM_FUNC_END(serpent_dec_blk_8way) diff --git a/arch/x86/crypto/serpent-sse2.h b/arch/x86/crypto/serpent-sse2.h new file mode 100644 index 000000000..860ca2489 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_SERPENT_SSE2_H +#define ASM_X86_SERPENT_SSE2_H + +#include +#include + +#ifdef CONFIG_X86_32 + +#define SERPENT_PARALLEL_BLOCKS 4 + +asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) +{ + __serpent_enc_blk_4way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) +{ + serpent_dec_blk_4way(ctx, dst, src); +} + +#else + +#define SERPENT_PARALLEL_BLOCKS 8 + +asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, false); +} + +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) +{ + __serpent_enc_blk_8way(ctx, dst, src, true); +} + +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) +{ + serpent_dec_blk_8way(ctx, dst, src); +} + +#endif + +#endif diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c new file mode 100644 index 000000000..347e97f4b --- /dev/null +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for x86_64/AVX2 assembler optimized version of Serpent + * + * Copyright © 2012-2013 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "serpent-avx.h" +#include "ecb_cbc_helpers.h" + +#define SERPENT_AVX2_PARALLEL_BLOCKS 16 + +/* 16-way AVX2 parallel cipher functions */ +asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); + +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_enc_16way); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_dec_16way); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_cbc_dec_16way); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); +} + +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; + +static int __init serpent_avx2_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 instructions are not detected.\n"); + return -ENODEV; + } + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); +} + +static void __exit serpent_avx2_fini(void) +{ + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); +} + +module_init(serpent_avx2_init); +module_exit(serpent_avx2_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); +MODULE_ALIAS_CRYPTO("serpent"); +MODULE_ALIAS_CRYPTO("serpent-asm"); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 000000000..6c248e1ea --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for AVX assembler versions of Serpent Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2011-2013 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "serpent-avx.h" +#include "ecb_cbc_helpers.h" + +/* 8-way parallel cipher functions */ +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx); + +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx); + +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); + +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); +} + +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; + +static int __init serpent_init(void) +{ + const char *feature_name; + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); +} + +static void __exit serpent_exit(void) +{ + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); +} + +module_init(serpent_init); +module_exit(serpent_exit); + +MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c new file mode 100644 index 000000000..d78f37e9b --- /dev/null +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for SSE2 assembler versions of Serpent Cipher + * + * Copyright (c) 2011 Jussi Kivilinna + * + * Glue code based on aesni-intel_glue.c by: + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "serpent-sse2.h" +#include "ecb_cbc_helpers.h" + +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +static void serpent_decrypt_cbc_xway(const void *ctx, u8 *dst, const u8 *src) +{ + u8 buf[SERPENT_PARALLEL_BLOCKS - 1][SERPENT_BLOCK_SIZE]; + const u8 *s = src; + + if (dst == src) + s = memcpy(buf, src, sizeof(buf)); + serpent_dec_blk_xway(ctx, dst, src); + crypto_xor(dst + SERPENT_BLOCK_SIZE, s, sizeof(buf)); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_enc_blk_xway); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_dec_blk_xway); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_decrypt_cbc_xway); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); +} + +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; + +static int __init serpent_sse2_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_XMM2)) { + printk(KERN_INFO "SSE2 instructions are not detected.\n"); + return -ENODEV; + } + + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); +} + +static void __exit serpent_sse2_exit(void) +{ + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); +} + +module_init(serpent_sse2_init); +module_exit(serpent_sse2_exit); + +MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S new file mode 100644 index 000000000..a96b2fd26 --- /dev/null +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -0,0 +1,711 @@ +/* + * Implement fast SHA-1 with AVX2 instructions. (x86_64) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Ilya Albrekht + * Maxim Locktyukhin + * Ronen Zohar + * Chandramouli Narayanan + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * SHA-1 implementation with Intel(R) AVX2 instruction set extensions. + * + *This implementation is based on the previous SSSE3 release: + *Visit http://software.intel.com/en-us/articles/ + *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ + * + *Updates 20-byte SHA-1 record at start of 'state', from 'input', for + *even number of 'blocks' consecutive 64-byte blocks. + * + *extern "C" void sha1_transform_avx2( + * struct sha1_state *state, const u8* input, int blocks ); + */ + +#include + +#define CTX %rdi /* arg1 */ +#define BUF %rsi /* arg2 */ +#define CNT %rdx /* arg3 */ + +#define REG_A %ecx +#define REG_B %esi +#define REG_C %edi +#define REG_D %eax +#define REG_E %edx +#define REG_TB %ebx +#define REG_TA %r12d +#define REG_RA %rcx +#define REG_RB %rsi +#define REG_RC %rdi +#define REG_RD %rax +#define REG_RE %rdx +#define REG_RTA %r12 +#define REG_RTB %rbx +#define REG_T1 %r11d +#define xmm_mov vmovups +#define avx2_zeroupper vzeroupper +#define RND_F1 1 +#define RND_F2 2 +#define RND_F3 3 + +.macro REGALLOC + .set A, REG_A + .set B, REG_B + .set C, REG_C + .set D, REG_D + .set E, REG_E + .set TB, REG_TB + .set TA, REG_TA + + .set RA, REG_RA + .set RB, REG_RB + .set RC, REG_RC + .set RD, REG_RD + .set RE, REG_RE + + .set RTA, REG_RTA + .set RTB, REG_RTB + + .set T1, REG_T1 +.endm + +#define HASH_PTR %r9 +#define BLOCKS_CTR %r8 +#define BUFFER_PTR %r10 +#define BUFFER_PTR2 %r13 + +#define PRECALC_BUF %r14 +#define WK_BUF %r15 + +#define W_TMP %xmm0 +#define WY_TMP %ymm0 +#define WY_TMP2 %ymm9 + +# AVX2 variables +#define WY0 %ymm3 +#define WY4 %ymm5 +#define WY08 %ymm7 +#define WY12 %ymm8 +#define WY16 %ymm12 +#define WY20 %ymm13 +#define WY24 %ymm14 +#define WY28 %ymm15 + +#define YMM_SHUFB_BSWAP %ymm10 + +/* + * Keep 2 iterations precalculated at a time: + * - 80 DWORDs per iteration * 2 + */ +#define W_SIZE (80*2*2 +16) + +#define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF) +#define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF) + + +.macro UPDATE_HASH hash, val + add \hash, \val + mov \val, \hash +.endm + +.macro PRECALC_RESET_WY + .set WY_00, WY0 + .set WY_04, WY4 + .set WY_08, WY08 + .set WY_12, WY12 + .set WY_16, WY16 + .set WY_20, WY20 + .set WY_24, WY24 + .set WY_28, WY28 + .set WY_32, WY_00 +.endm + +.macro PRECALC_ROTATE_WY + /* Rotate macros */ + .set WY_32, WY_28 + .set WY_28, WY_24 + .set WY_24, WY_20 + .set WY_20, WY_16 + .set WY_16, WY_12 + .set WY_12, WY_08 + .set WY_08, WY_04 + .set WY_04, WY_00 + .set WY_00, WY_32 + + /* Define register aliases */ + .set WY, WY_00 + .set WY_minus_04, WY_04 + .set WY_minus_08, WY_08 + .set WY_minus_12, WY_12 + .set WY_minus_16, WY_16 + .set WY_minus_20, WY_20 + .set WY_minus_24, WY_24 + .set WY_minus_28, WY_28 + .set WY_minus_32, WY +.endm + +.macro PRECALC_00_15 + .if (i == 0) # Initialize and rotate registers + PRECALC_RESET_WY + PRECALC_ROTATE_WY + .endif + + /* message scheduling pre-compute for rounds 0-15 */ + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + vmovdqu (i * 2)(BUFFER_PTR), W_TMP + .elseif ((i & 7) == 1) + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\ + WY_TMP, WY_TMP + .elseif ((i & 7) == 2) + vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY + .elseif ((i & 7) == 4) + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP + .elseif ((i & 7) == 7) + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC_16_31 + /* + * message scheduling pre-compute for rounds 16-31 + * calculating last 32 w[i] values in 8 XMM registers + * pre-calculate K+w[i] values and store to mem + * for later load by ALU add instruction + * + * "brute force" vectorization for rounds 16-31 only + * due to w[i]->w[i-3] dependency + */ + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + /* w[i-14] */ + vpalignr $8, WY_minus_16, WY_minus_12, WY + vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */ + .elseif ((i & 7) == 1) + vpxor WY_minus_08, WY, WY + vpxor WY_minus_16, WY_TMP, WY_TMP + .elseif ((i & 7) == 2) + vpxor WY_TMP, WY, WY + vpslldq $12, WY, WY_TMP2 + .elseif ((i & 7) == 3) + vpslld $1, WY, WY_TMP + vpsrld $31, WY, WY + .elseif ((i & 7) == 4) + vpor WY, WY_TMP, WY_TMP + vpslld $2, WY_TMP2, WY + .elseif ((i & 7) == 5) + vpsrld $30, WY_TMP2, WY_TMP2 + vpxor WY, WY_TMP, WY_TMP + .elseif ((i & 7) == 7) + vpxor WY_TMP2, WY_TMP, WY + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC_32_79 + /* + * in SHA-1 specification: + * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 + * instead we do equal: + * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 + * allows more efficient vectorization + * since w[i]=>w[i-3] dependency is broken + */ + + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP + .elseif ((i & 7) == 1) + /* W is W_minus_32 before xor */ + vpxor WY_minus_28, WY, WY + .elseif ((i & 7) == 2) + vpxor WY_minus_16, WY_TMP, WY_TMP + .elseif ((i & 7) == 3) + vpxor WY_TMP, WY, WY + .elseif ((i & 7) == 4) + vpslld $2, WY, WY_TMP + .elseif ((i & 7) == 5) + vpsrld $30, WY, WY + vpor WY, WY_TMP, WY + .elseif ((i & 7) == 7) + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC r, s + .set i, \r + + .if (i < 40) + .set K_XMM, 32*0 + .elseif (i < 80) + .set K_XMM, 32*1 + .elseif (i < 120) + .set K_XMM, 32*2 + .else + .set K_XMM, 32*3 + .endif + + .if (i<32) + PRECALC_00_15 \s + .elseif (i<64) + PRECALC_16_31 \s + .elseif (i < 160) + PRECALC_32_79 \s + .endif +.endm + +.macro ROTATE_STATE + .set T_REG, E + .set E, D + .set D, C + .set C, B + .set B, TB + .set TB, A + .set A, T_REG + + .set T_REG, RE + .set RE, RD + .set RD, RC + .set RC, RB + .set RB, RTB + .set RTB, RA + .set RA, T_REG +.endm + +/* Macro relies on saved ROUND_Fx */ + +.macro RND_FUN f, r + .if (\f == RND_F1) + ROUND_F1 \r + .elseif (\f == RND_F2) + ROUND_F2 \r + .elseif (\f == RND_F3) + ROUND_F3 \r + .endif +.endm + +.macro RR r + .set round_id, (\r % 80) + + .if (round_id == 0) /* Precalculate F for first round */ + .set ROUND_FUNC, RND_F1 + mov B, TB + + rorx $(32-30), B, B /* b>>>2 */ + andn D, TB, T1 + and C, TB + xor T1, TB + .endif + + RND_FUN ROUND_FUNC, \r + ROTATE_STATE + + .if (round_id == 18) + .set ROUND_FUNC, RND_F2 + .elseif (round_id == 38) + .set ROUND_FUNC, RND_F3 + .elseif (round_id == 58) + .set ROUND_FUNC, RND_F2 + .endif + + .set round_id, ( (\r+1) % 80) + + RND_FUN ROUND_FUNC, (\r+1) + ROTATE_STATE +.endm + +.macro ROUND_F1 r + add WK(\r), E + + andn C, A, T1 /* ~b&d */ + lea (RE,RTB), E /* Add F from the previous round */ + + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + rorx $(32-30),A, TB /* b>>>2 for next round */ + + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + /* + * Calculate F for the next round + * (b & c) ^ andn[b, d] + */ + and B, A /* b&c */ + xor T1, A /* F1 = (b&c) ^ (~b&d) */ + + lea (RE,RTA), E /* E += A >>> 5 */ +.endm + +.macro ROUND_F2 r + add WK(\r), E + lea (RE,RTB), E /* Add F from the previous round */ + + /* Calculate F for the next round */ + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + .if ((round_id) < 79) + rorx $(32-30), A, TB /* b>>>2 for next round */ + .endif + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + .if ((round_id) < 79) + xor B, A + .endif + + add TA, E /* E += A >>> 5 */ + + .if ((round_id) < 79) + xor C, A + .endif +.endm + +.macro ROUND_F3 r + add WK(\r), E + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + lea (RE,RTB), E /* Add F from the previous round */ + + mov B, T1 + or A, T1 + + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + rorx $(32-30), A, TB /* b>>>2 for next round */ + + /* Calculate F for the next round + * (b and c) or (d and (b or c)) + */ + and C, T1 + and B, A + or T1, A + + add TA, E /* E += A >>> 5 */ + +.endm + +/* Add constant only if (%2 > %3) condition met (uses RTA as temp) + * %1 + %2 >= %3 ? %4 : 0 + */ +.macro ADD_IF_GE a, b, c, d + mov \a, RTA + add $\d, RTA + cmp $\c, \b + cmovge RTA, \a +.endm + +/* + * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining + */ +.macro SHA1_PIPELINED_MAIN_BODY + + REGALLOC + + mov (HASH_PTR), A + mov 4(HASH_PTR), B + mov 8(HASH_PTR), C + mov 12(HASH_PTR), D + mov 16(HASH_PTR), E + + mov %rsp, PRECALC_BUF + lea (2*4*80+32)(%rsp), WK_BUF + + # Precalc WK for first 2 blocks + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64 + .set i, 0 + .rept 160 + PRECALC i + .set i, i + 1 + .endr + + /* Go to next block if needed */ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 + xchg WK_BUF, PRECALC_BUF + + .align 32 +_loop: + /* + * code loops through more than one block + * we use K_BASE value as a signal of a last block, + * it is set below by: cmovae BUFFER_PTR, K_BASE + */ + test BLOCKS_CTR, BLOCKS_CTR + jnz _begin + .align 32 + jmp _end + .align 32 +_begin: + + /* + * Do first block + * rounds: 0,2,4,6,8 + */ + .set j, 0 + .rept 5 + RR j + .set j, j+2 + .endr + + jmp _loop0 +_loop0: + + /* + * rounds: + * 10,12,14,16,18 + * 20,22,24,26,28 + * 30,32,34,36,38 + * 40,42,44,46,48 + * 50,52,54,56,58 + */ + .rept 25 + RR j + .set j, j+2 + .endr + + /* Update Counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128 + /* + * rounds + * 60,62,64,66,68 + * 70,72,74,76,78 + */ + .rept 10 + RR j + .set j, j+2 + .endr + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), TB + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + test BLOCKS_CTR, BLOCKS_CTR + jz _loop + + mov TB, B + + /* Process second block */ + /* + * rounds + * 0+80, 2+80, 4+80, 6+80, 8+80 + * 10+80,12+80,14+80,16+80,18+80 + */ + + .set j, 0 + .rept 10 + RR j+80 + .set j, j+2 + .endr + + jmp _loop1 +_loop1: + /* + * rounds + * 20+80,22+80,24+80,26+80,28+80 + * 30+80,32+80,34+80,36+80,38+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + jmp _loop2 +_loop2: + + /* + * rounds + * 40+80,42+80,44+80,46+80,48+80 + * 50+80,52+80,54+80,56+80,58+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + /* update counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 + + jmp _loop3 +_loop3: + + /* + * rounds + * 60+80,62+80,64+80,66+80,68+80 + * 70+80,72+80,74+80,76+80,78+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), TB + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + /* Reset state for AVX2 reg permutation */ + mov A, TA + mov TB, A + mov C, TB + mov E, C + mov D, B + mov TA, D + + REGALLOC + + xchg WK_BUF, PRECALC_BUF + + jmp _loop + + .align 32 + _end: + +.endm +/* + * macro implements SHA-1 function's body for several 64-byte blocks + * param: function's name + */ +.macro SHA1_VECTOR_ASM name + SYM_FUNC_START(\name) + + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + RESERVE_STACK = (W_SIZE*4 + 8+24) + + /* Align stack */ + push %rbp + mov %rsp, %rbp + and $~(0x20-1), %rsp + sub $RESERVE_STACK, %rsp + + avx2_zeroupper + + /* Setup initial values */ + mov CTX, HASH_PTR + mov BUF, BUFFER_PTR + + mov BUF, BUFFER_PTR2 + mov CNT, BLOCKS_CTR + + xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP + + SHA1_PIPELINED_MAIN_BODY + + avx2_zeroupper + + mov %rbp, %rsp + pop %rbp + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + + RET + + SYM_FUNC_END(\name) +.endm + +.section .rodata + +#define K1 0x5a827999 +#define K2 0x6ed9eba1 +#define K3 0x8f1bbcdc +#define K4 0xca62c1d6 + +.align 128 +K_XMM_AR: + .long K1, K1, K1, K1 + .long K1, K1, K1, K1 + .long K2, K2, K2, K2 + .long K2, K2, K2, K2 + .long K3, K3, K3, K3 + .long K3, K3, K3, K3 + .long K4, K4, K4, K4 + .long K4, K4, K4, K4 + +BSWAP_SHUFB_CTL: + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f +.text + +SHA1_VECTOR_ASM sha1_transform_avx2 diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S new file mode 100644 index 000000000..3cae5a1bb --- /dev/null +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -0,0 +1,305 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +/* gcc conversion */ +#define FRAME_SIZE 32 /* space for 2x16 bytes */ + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha1_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks) + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ +.text +.align 32 +SYM_TYPED_FUNC_START(sha1_ni_transform) + push %rbp + mov %rsp, %rbp + sub $FRAME_SIZE, %rsp + and $~0xF, %rsp + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* load initial hash values */ + pinsrd $3, 1*16(DIGEST_PTR), E0 + movdqu 0*16(DIGEST_PTR), ABCD + pand UPPER_WORD_MASK(%rip), E0 + pshufd $0x1B, ABCD, ABCD + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa E0, (0*16)(%rsp) + movdqa ABCD, (1*16)(%rsp) + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + pxor MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte (0*16)(%rsp), E0 + paddd (1*16)(%rsp), ABCD + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, ABCD, ABCD + movdqu ABCD, 0*16(DIGEST_PTR) + pextrd $3, E0, 1*16(DIGEST_PTR) + +.Ldone_hash: + mov %rbp, %rsp + pop %rbp + + RET +SYM_FUNC_END(sha1_ni_transform) + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f + +.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 +.align 16 +UPPER_WORD_MASK: + .octa 0xFFFFFFFF000000000000000000000000 diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S new file mode 100644 index 000000000..f54988c80 --- /dev/null +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -0,0 +1,554 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental + * SSE3 instruction set extensions introduced in Intel Core Microarchitecture + * processors. CPUs supporting Intel(R) AVX extensions will get an additional + * boost. + * + * This work was inspired by the vectorized implementation of Dean Gaudet. + * Additional information on it can be found at: + * http://www.arctic.org/~dean/crypto/sha1.html + * + * It was improved upon with more efficient vectorization of the message + * scheduling. This implementation has also been optimized for all current and + * several future generations of Intel CPUs. + * + * See this article for more information about the implementation details: + * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/ + * + * Copyright (C) 2010, Intel Corp. + * Authors: Maxim Locktyukhin + * Ronen Zohar + * + * Converted to AT&T syntax and adapted for inclusion in the Linux kernel: + * Author: Mathias Krause + */ + +#include +#include + +#define CTX %rdi // arg1 +#define BUF %rsi // arg2 +#define CNT %rdx // arg3 + +#define REG_A %ecx +#define REG_B %esi +#define REG_C %edi +#define REG_D %r12d +#define REG_E %edx + +#define REG_T1 %eax +#define REG_T2 %ebx + +#define K_BASE %r8 +#define HASH_PTR %r9 +#define BUFFER_PTR %r10 +#define BUFFER_END %r11 + +#define W_TMP1 %xmm0 +#define W_TMP2 %xmm9 + +#define W0 %xmm1 +#define W4 %xmm2 +#define W8 %xmm3 +#define W12 %xmm4 +#define W16 %xmm5 +#define W20 %xmm6 +#define W24 %xmm7 +#define W28 %xmm8 + +#define XMM_SHUFB_BSWAP %xmm10 + +/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */ +#define WK(t) (((t) & 15) * 4)(%rsp) +#define W_PRECALC_AHEAD 16 + +/* + * This macro implements the SHA-1 function's body for single 64-byte block + * param: function's name + */ +.macro SHA1_VECTOR_ASM name + SYM_TYPED_FUNC_START(\name) + + push %rbx + push %r12 + push %rbp + mov %rsp, %rbp + + sub $64, %rsp # allocate workspace + and $~15, %rsp # align stack + + mov CTX, HASH_PTR + mov BUF, BUFFER_PTR + + shl $6, CNT # multiply by 64 + add BUF, CNT + mov CNT, BUFFER_END + + lea K_XMM_AR(%rip), K_BASE + xmm_mov BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP + + SHA1_PIPELINED_MAIN_BODY + + # cleanup workspace + mov $8, %ecx + mov %rsp, %rdi + xor %eax, %eax + rep stosq + + mov %rbp, %rsp # deallocate workspace + pop %rbp + pop %r12 + pop %rbx + RET + + SYM_FUNC_END(\name) +.endm + +/* + * This macro implements 80 rounds of SHA-1 for one 64-byte block + */ +.macro SHA1_PIPELINED_MAIN_BODY + INIT_REGALLOC + + mov (HASH_PTR), A + mov 4(HASH_PTR), B + mov 8(HASH_PTR), C + mov 12(HASH_PTR), D + mov 16(HASH_PTR), E + + .set i, 0 + .rept W_PRECALC_AHEAD + W_PRECALC i + .set i, (i+1) + .endr + +.align 4 +1: + RR F1,A,B,C,D,E,0 + RR F1,D,E,A,B,C,2 + RR F1,B,C,D,E,A,4 + RR F1,E,A,B,C,D,6 + RR F1,C,D,E,A,B,8 + + RR F1,A,B,C,D,E,10 + RR F1,D,E,A,B,C,12 + RR F1,B,C,D,E,A,14 + RR F1,E,A,B,C,D,16 + RR F1,C,D,E,A,B,18 + + RR F2,A,B,C,D,E,20 + RR F2,D,E,A,B,C,22 + RR F2,B,C,D,E,A,24 + RR F2,E,A,B,C,D,26 + RR F2,C,D,E,A,B,28 + + RR F2,A,B,C,D,E,30 + RR F2,D,E,A,B,C,32 + RR F2,B,C,D,E,A,34 + RR F2,E,A,B,C,D,36 + RR F2,C,D,E,A,B,38 + + RR F3,A,B,C,D,E,40 + RR F3,D,E,A,B,C,42 + RR F3,B,C,D,E,A,44 + RR F3,E,A,B,C,D,46 + RR F3,C,D,E,A,B,48 + + RR F3,A,B,C,D,E,50 + RR F3,D,E,A,B,C,52 + RR F3,B,C,D,E,A,54 + RR F3,E,A,B,C,D,56 + RR F3,C,D,E,A,B,58 + + add $64, BUFFER_PTR # move to the next 64-byte block + cmp BUFFER_END, BUFFER_PTR # if the current is the last one use + cmovae K_BASE, BUFFER_PTR # dummy source to avoid buffer overrun + + RR F4,A,B,C,D,E,60 + RR F4,D,E,A,B,C,62 + RR F4,B,C,D,E,A,64 + RR F4,E,A,B,C,D,66 + RR F4,C,D,E,A,B,68 + + RR F4,A,B,C,D,E,70 + RR F4,D,E,A,B,C,72 + RR F4,B,C,D,E,A,74 + RR F4,E,A,B,C,D,76 + RR F4,C,D,E,A,B,78 + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), B + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + RESTORE_RENAMED_REGS + cmp K_BASE, BUFFER_PTR # K_BASE means, we reached the end + jne 1b +.endm + +.macro INIT_REGALLOC + .set A, REG_A + .set B, REG_B + .set C, REG_C + .set D, REG_D + .set E, REG_E + .set T1, REG_T1 + .set T2, REG_T2 +.endm + +.macro RESTORE_RENAMED_REGS + # order is important (REG_C is where it should be) + mov B, REG_B + mov D, REG_D + mov A, REG_A + mov E, REG_E +.endm + +.macro SWAP_REG_NAMES a, b + .set _T, \a + .set \a, \b + .set \b, _T +.endm + +.macro F1 b, c, d + mov \c, T1 + SWAP_REG_NAMES \c, T1 + xor \d, T1 + and \b, T1 + xor \d, T1 +.endm + +.macro F2 b, c, d + mov \d, T1 + SWAP_REG_NAMES \d, T1 + xor \c, T1 + xor \b, T1 +.endm + +.macro F3 b, c ,d + mov \c, T1 + SWAP_REG_NAMES \c, T1 + mov \b, T2 + or \b, T1 + and \c, T2 + and \d, T1 + or T2, T1 +.endm + +.macro F4 b, c, d + F2 \b, \c, \d +.endm + +.macro UPDATE_HASH hash, val + add \hash, \val + mov \val, \hash +.endm + +/* + * RR does two rounds of SHA-1 back to back with W[] pre-calc + * t1 = F(b, c, d); e += w(i) + * e += t1; b <<= 30; d += w(i+1); + * t1 = F(a, b, c); + * d += t1; a <<= 5; + * e += a; + * t1 = e; a >>= 7; + * t1 <<= 5; + * d += t1; + */ +.macro RR F, a, b, c, d, e, round + add WK(\round), \e + \F \b, \c, \d # t1 = F(b, c, d); + W_PRECALC (\round + W_PRECALC_AHEAD) + rol $30, \b + add T1, \e + add WK(\round + 1), \d + + \F \a, \b, \c + W_PRECALC (\round + W_PRECALC_AHEAD + 1) + rol $5, \a + add \a, \e + add T1, \d + ror $7, \a # (a <>r 7) => a <= 80) && (i < (80 + W_PRECALC_AHEAD)))) + .set i, ((\r) % 80) # pre-compute for the next iteration + .if (i == 0) + W_PRECALC_RESET + .endif + W_PRECALC_00_15 + .elseif (i<32) + W_PRECALC_16_31 + .elseif (i < 80) // rounds 32-79 + W_PRECALC_32_79 + .endif +.endm + +.macro W_PRECALC_RESET + .set W, W0 + .set W_minus_04, W4 + .set W_minus_08, W8 + .set W_minus_12, W12 + .set W_minus_16, W16 + .set W_minus_20, W20 + .set W_minus_24, W24 + .set W_minus_28, W28 + .set W_minus_32, W +.endm + +.macro W_PRECALC_ROTATE + .set W_minus_32, W_minus_28 + .set W_minus_28, W_minus_24 + .set W_minus_24, W_minus_20 + .set W_minus_20, W_minus_16 + .set W_minus_16, W_minus_12 + .set W_minus_12, W_minus_08 + .set W_minus_08, W_minus_04 + .set W_minus_04, W + .set W, W_minus_32 +.endm + +.macro W_PRECALC_SSSE3 + +.macro W_PRECALC_00_15 + W_PRECALC_00_15_SSSE3 +.endm +.macro W_PRECALC_16_31 + W_PRECALC_16_31_SSSE3 +.endm +.macro W_PRECALC_32_79 + W_PRECALC_32_79_SSSE3 +.endm + +/* message scheduling pre-compute for rounds 0-15 */ +.macro W_PRECALC_00_15_SSSE3 + .if ((i & 3) == 0) + movdqu (i*4)(BUFFER_PTR), W_TMP1 + .elseif ((i & 3) == 1) + pshufb XMM_SHUFB_BSWAP, W_TMP1 + movdqa W_TMP1, W + .elseif ((i & 3) == 2) + paddd (K_BASE), W_TMP1 + .elseif ((i & 3) == 3) + movdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +/* message scheduling pre-compute for rounds 16-31 + * + * - calculating last 32 w[i] values in 8 XMM registers + * - pre-calculate K+w[i] values and store to mem, for later load by ALU add + * instruction + * + * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3] + * dependency, but improves for 32-79 + */ +.macro W_PRECALC_16_31_SSSE3 + # blended scheduling of vector and scalar instruction streams, one 4-wide + # vector iteration / 4 scalar rounds + .if ((i & 3) == 0) + movdqa W_minus_12, W + palignr $8, W_minus_16, W # w[i-14] + movdqa W_minus_04, W_TMP1 + psrldq $4, W_TMP1 # w[i-3] + pxor W_minus_08, W + .elseif ((i & 3) == 1) + pxor W_minus_16, W_TMP1 + pxor W_TMP1, W + movdqa W, W_TMP2 + movdqa W, W_TMP1 + pslldq $12, W_TMP2 + .elseif ((i & 3) == 2) + psrld $31, W + pslld $1, W_TMP1 + por W, W_TMP1 + movdqa W_TMP2, W + psrld $30, W_TMP2 + pslld $2, W + .elseif ((i & 3) == 3) + pxor W, W_TMP1 + pxor W_TMP2, W_TMP1 + movdqa W_TMP1, W + paddd K_XMM(K_BASE), W_TMP1 + movdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +/* message scheduling pre-compute for rounds 32-79 + * + * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 + * instead we do equal: w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 + * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken + */ +.macro W_PRECALC_32_79_SSSE3 + .if ((i & 3) == 0) + movdqa W_minus_04, W_TMP1 + pxor W_minus_28, W # W is W_minus_32 before xor + palignr $8, W_minus_08, W_TMP1 + .elseif ((i & 3) == 1) + pxor W_minus_16, W + pxor W_TMP1, W + movdqa W, W_TMP1 + .elseif ((i & 3) == 2) + psrld $30, W + pslld $2, W_TMP1 + por W, W_TMP1 + .elseif ((i & 3) == 3) + movdqa W_TMP1, W + paddd K_XMM(K_BASE), W_TMP1 + movdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.endm // W_PRECALC_SSSE3 + + +#define K1 0x5a827999 +#define K2 0x6ed9eba1 +#define K3 0x8f1bbcdc +#define K4 0xca62c1d6 + +.section .rodata +.align 16 + +K_XMM_AR: + .long K1, K1, K1, K1 + .long K2, K2, K2, K2 + .long K3, K3, K3, K3 + .long K4, K4, K4, K4 + +BSWAP_SHUFB_CTL: + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f + + +.section .text + +W_PRECALC_SSSE3 +.macro xmm_mov a, b + movdqu \a,\b +.endm + +/* + * SSSE3 optimized implementation: + * + * extern "C" void sha1_transform_ssse3(struct sha1_state *state, + * const u8 *data, int blocks); + * + * Note that struct sha1_state is assumed to begin with u32 state[5]. + */ +SHA1_VECTOR_ASM sha1_transform_ssse3 + +.macro W_PRECALC_AVX + +.purgem W_PRECALC_00_15 +.macro W_PRECALC_00_15 + W_PRECALC_00_15_AVX +.endm +.purgem W_PRECALC_16_31 +.macro W_PRECALC_16_31 + W_PRECALC_16_31_AVX +.endm +.purgem W_PRECALC_32_79 +.macro W_PRECALC_32_79 + W_PRECALC_32_79_AVX +.endm + +.macro W_PRECALC_00_15_AVX + .if ((i & 3) == 0) + vmovdqu (i*4)(BUFFER_PTR), W_TMP1 + .elseif ((i & 3) == 1) + vpshufb XMM_SHUFB_BSWAP, W_TMP1, W + .elseif ((i & 3) == 2) + vpaddd (K_BASE), W, W_TMP1 + .elseif ((i & 3) == 3) + vmovdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.macro W_PRECALC_16_31_AVX + .if ((i & 3) == 0) + vpalignr $8, W_minus_16, W_minus_12, W # w[i-14] + vpsrldq $4, W_minus_04, W_TMP1 # w[i-3] + vpxor W_minus_08, W, W + vpxor W_minus_16, W_TMP1, W_TMP1 + .elseif ((i & 3) == 1) + vpxor W_TMP1, W, W + vpslldq $12, W, W_TMP2 + vpslld $1, W, W_TMP1 + .elseif ((i & 3) == 2) + vpsrld $31, W, W + vpor W, W_TMP1, W_TMP1 + vpslld $2, W_TMP2, W + vpsrld $30, W_TMP2, W_TMP2 + .elseif ((i & 3) == 3) + vpxor W, W_TMP1, W_TMP1 + vpxor W_TMP2, W_TMP1, W + vpaddd K_XMM(K_BASE), W, W_TMP1 + vmovdqu W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.macro W_PRECALC_32_79_AVX + .if ((i & 3) == 0) + vpalignr $8, W_minus_08, W_minus_04, W_TMP1 + vpxor W_minus_28, W, W # W is W_minus_32 before xor + .elseif ((i & 3) == 1) + vpxor W_minus_16, W_TMP1, W_TMP1 + vpxor W_TMP1, W, W + .elseif ((i & 3) == 2) + vpslld $2, W, W_TMP1 + vpsrld $30, W, W + vpor W, W_TMP1, W + .elseif ((i & 3) == 3) + vpaddd K_XMM(K_BASE), W, W_TMP1 + vmovdqu W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.endm // W_PRECALC_AVX + +W_PRECALC_AVX +.purgem xmm_mov +.macro xmm_mov a, b + vmovdqu \a,\b +.endm + + +/* AVX optimized implementation: + * extern "C" void sha1_transform_avx(struct sha1_state *state, + * const u8 *data, int blocks); + */ +SHA1_VECTOR_ASM sha1_transform_avx diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c new file mode 100644 index 000000000..959afa705 --- /dev/null +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cryptographic API. + * + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using + * Supplemental SSE3 instructions. + * + * This file is based on sha1_generic.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * Copyright (c) Chandramouli Narayanan + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct x86_cpu_id module_cpu_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), + X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), + X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha1_block_fn *sha1_xform) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) + return crypto_sha1_update(desc, data, len); + + /* + * Make sure struct sha1_state begins directly with the SHA1 + * 160-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); + + kernel_fpu_begin(); + sha1_base_do_update(desc, data, len, sha1_xform); + kernel_fpu_end(); + + return 0; +} + +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha1_block_fn *sha1_xform) +{ + if (!crypto_simd_usable()) + return crypto_sha1_finup(desc, data, len, out); + + kernel_fpu_begin(); + if (len) + sha1_base_do_update(desc, data, len, sha1_xform); + sha1_base_do_finalize(desc, sha1_xform); + kernel_fpu_end(); + + return sha1_base_finish(desc, out); +} + +asmlinkage void sha1_transform_ssse3(struct sha1_state *state, + const u8 *data, int blocks); + +static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, sha1_transform_ssse3); +} + +static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, sha1_transform_ssse3); +} + +/* Add padding and return the message digest. */ +static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ssse3_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_ssse3_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_ssse3_update, + .final = sha1_ssse3_final, + .finup = sha1_ssse3_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ssse3", + .cra_priority = 150, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int register_sha1_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shash(&sha1_ssse3_alg); + return 0; +} + +static void unregister_sha1_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shash(&sha1_ssse3_alg); +} + +asmlinkage void sha1_transform_avx(struct sha1_state *state, + const u8 *data, int blocks); + +static int sha1_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, sha1_transform_avx); +} + +static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, sha1_transform_avx); +} + +static int sha1_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha1_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_avx_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_avx_update, + .final = sha1_avx_final, + .finup = sha1_avx_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-avx", + .cra_priority = 160, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool avx_usable(void) +{ + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + if (boot_cpu_has(X86_FEATURE_AVX)) + pr_info("AVX detected but unusable.\n"); + return false; + } + + return true; +} + +static int register_sha1_avx(void) +{ + if (avx_usable()) + return crypto_register_shash(&sha1_avx_alg); + return 0; +} + +static void unregister_sha1_avx(void) +{ + if (avx_usable()) + crypto_unregister_shash(&sha1_avx_alg); +} + +#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ + +asmlinkage void sha1_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks); + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + && boot_cpu_has(X86_FEATURE_BMI1) + && boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static void sha1_apply_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks) +{ + /* Select the optimal transform based on data block size */ + if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(state, data, blocks); + else + sha1_transform_avx(state, data, blocks); +} + +static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, sha1_apply_transform_avx2); +} + +static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2); +} + +static int sha1_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha1_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_avx2_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_avx2_update, + .final = sha1_avx2_final, + .finup = sha1_avx2_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-avx2", + .cra_priority = 170, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int register_sha1_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shash(&sha1_avx2_alg); + return 0; +} + +static void unregister_sha1_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shash(&sha1_avx2_alg); +} + +#ifdef CONFIG_AS_SHA1_NI +asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, + int rounds); + +static int sha1_ni_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, sha1_ni_transform); +} + +static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, sha1_ni_transform); +} + +static int sha1_ni_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ni_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_ni_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_ni_update, + .final = sha1_ni_final, + .finup = sha1_ni_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ni", + .cra_priority = 250, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int register_sha1_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + return crypto_register_shash(&sha1_ni_alg); + return 0; +} + +static void unregister_sha1_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + crypto_unregister_shash(&sha1_ni_alg); +} + +#else +static inline int register_sha1_ni(void) { return 0; } +static inline void unregister_sha1_ni(void) { } +#endif + +static int __init sha1_ssse3_mod_init(void) +{ + if (!x86_match_cpu(module_cpu_ids)) + return -ENODEV; + + if (register_sha1_ssse3()) + goto fail; + + if (register_sha1_avx()) { + unregister_sha1_ssse3(); + goto fail; + } + + if (register_sha1_avx2()) { + unregister_sha1_avx(); + unregister_sha1_ssse3(); + goto fail; + } + + if (register_sha1_ni()) { + unregister_sha1_avx2(); + unregister_sha1_avx(); + unregister_sha1_ssse3(); + goto fail; + } + + return 0; +fail: + return -ENODEV; +} + +static void __exit sha1_ssse3_mod_fini(void) +{ + unregister_sha1_ni(); + unregister_sha1_avx2(); + unregister_sha1_avx(); + unregister_sha1_ssse3(); +} + +module_init(sha1_ssse3_mod_init); +module_exit(sha1_ssse3_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); + +MODULE_ALIAS_CRYPTO("sha1"); +MODULE_ALIAS_CRYPTO("sha1-ssse3"); +MODULE_ALIAS_CRYPTO("sha1-avx"); +MODULE_ALIAS_CRYPTO("sha1-avx2"); +#ifdef CONFIG_AS_SHA1_NI +MODULE_ALIAS_CRYPTO("sha1-ni"); +#endif diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S new file mode 100644 index 000000000..06ea30c20 --- /dev/null +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -0,0 +1,500 @@ +######################################################################## +# Implement fast SHA-256 with AVX1 instructions. (x86_64) +# +# Copyright (C) 2013 Intel Corporation. +# +# Authors: +# James Guilford +# Kirk Yap +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +######################################################################## +# +# This code is described in an Intel White-Paper: +# "Fast SHA-256 Implementations on Intel Architecture Processors" +# +# To find it, surf to http://www.intel.com/p/en_US/embedded +# and search for that title. +# +######################################################################## +# This code schedules 1 block at a time, with 4 lanes per block +######################################################################## + +#include +#include + +## assume buffers not aligned +#define VMOVDQ vmovdqu + +################################ Define Macros + +# addm [mem], reg +# Add reg to mem using reg-mem add and store +.macro addm p1 p2 + add \p1, \p2 + mov \p2, \p1 +.endm + + +.macro MY_ROR p1 p2 + shld $(32-(\p1)), \p2, \p2 +.endm + +################################ + +# COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask +# Load xmm with mem and byte swap each dword +.macro COPY_XMM_AND_BSWAP p1 p2 p3 + VMOVDQ \p2, \p1 + vpshufb \p3, \p1, \p1 +.endm + +################################ + +X0 = %xmm4 +X1 = %xmm5 +X2 = %xmm6 +X3 = %xmm7 + +XTMP0 = %xmm0 +XTMP1 = %xmm1 +XTMP2 = %xmm2 +XTMP3 = %xmm3 +XTMP4 = %xmm8 +XFER = %xmm9 +XTMP5 = %xmm11 + +SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA +SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00 +BYTE_FLIP_MASK = %xmm13 + +NUM_BLKS = %rdx # 3rd arg +INP = %rsi # 2nd arg +CTX = %rdi # 1st arg + +SRND = %rsi # clobbers INP +c = %ecx +d = %r8d +e = %edx +TBL = %r12 +a = %eax +b = %ebx + +f = %r9d +g = %r10d +h = %r11d + +y0 = %r13d +y1 = %r14d +y2 = %r15d + + +_INP_END_SIZE = 8 +_INP_SIZE = 8 +_XFER_SIZE = 16 +_XMM_SAVE_SIZE = 0 + +_INP_END = 0 +_INP = _INP_END + _INP_END_SIZE +_XFER = _INP + _INP_SIZE +_XMM_SAVE = _XFER + _XFER_SIZE +STACK_SIZE = _XMM_SAVE + _XMM_SAVE_SIZE + +# rotate_Xs +# Rotate values of symbols X0...X3 +.macro rotate_Xs +X_ = X0 +X0 = X1 +X1 = X2 +X2 = X3 +X3 = X_ +.endm + +# ROTATE_ARGS +# Rotate values of symbols a...h +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro FOUR_ROUNDS_AND_SCHED + ## compute s0 four at a time and s1 two at a time + ## compute W[-16] + W[-7] 4 at a time + + mov e, y0 # y0 = e + MY_ROR (25-11), y0 # y0 = e >> (25-11) + mov a, y1 # y1 = a + vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7] + MY_ROR (22-13), y1 # y1 = a >> (22-13) + xor e, y0 # y0 = e ^ (e >> (25-11)) + mov f, y2 # y2 = f + MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + xor a, y1 # y1 = a ^ (a >> (22-13) + xor g, y2 # y2 = f^g + vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16] + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + ## compute s0 + vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15] + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + xor g, y2 # y2 = CH = ((f^g)&e)^g + MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + add y0, y2 # y2 = S1 + CH + add _XFER(%rsp), y2 # y2 = k + w + S1 + CH + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + vpsrld $7, XTMP1, XTMP2 + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + vpslld $(32-7), XTMP1, XTMP3 + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + ROTATE_ARGS + mov e, y0 # y0 = e + mov a, y1 # y1 = a + MY_ROR (25-11), y0 # y0 = e >> (25-11) + xor e, y0 # y0 = e ^ (e >> (25-11)) + mov f, y2 # y2 = f + MY_ROR (22-13), y1 # y1 = a >> (22-13) + vpsrld $18, XTMP1, XTMP2 # + xor a, y1 # y1 = a ^ (a >> (22-13) + MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + xor g, y2 # y2 = f^g + vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3 + MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + vpslld $(32-18), XTMP1, XTMP1 + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + xor g, y2 # y2 = CH = ((f^g)&e)^g + vpxor XTMP1, XTMP3, XTMP3 # + add y0, y2 # y2 = S1 + CH + add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH + MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + vpxor XTMP4, XTMP3, XTMP1 # XTMP1 = s0 + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + ## compute low s1 + vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA} + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + ROTATE_ARGS + mov e, y0 # y0 = e + mov a, y1 # y1 = a + MY_ROR (25-11), y0 # y0 = e >> (25-11) + xor e, y0 # y0 = e ^ (e >> (25-11)) + MY_ROR (22-13), y1 # y1 = a >> (22-13) + mov f, y2 # y2 = f + xor a, y1 # y1 = a ^ (a >> (22-13) + MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA} + xor g, y2 # y2 = f^g + vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA} + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA} + MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + xor g, y2 # y2 = CH = ((f^g)&e)^g + MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + vpxor XTMP3, XTMP2, XTMP2 # + add y0, y2 # y2 = S1 + CH + MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH + vpxor XTMP2, XTMP4, XTMP4 # XTMP4 = s1 {xBxA} + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA} + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]} + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + ## compute high s1 + vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC} + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + ROTATE_ARGS + mov e, y0 # y0 = e + MY_ROR (25-11), y0 # y0 = e >> (25-11) + mov a, y1 # y1 = a + MY_ROR (22-13), y1 # y1 = a >> (22-13) + xor e, y0 # y0 = e ^ (e >> (25-11)) + mov f, y2 # y2 = f + MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC} + xor a, y1 # y1 = a ^ (a >> (22-13) + xor g, y2 # y2 = f^g + vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC} + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC} + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + xor g, y2 # y2 = CH = ((f^g)&e)^g + vpxor XTMP3, XTMP2, XTMP2 + MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + add y0, y2 # y2 = S1 + CH + add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH + vpxor XTMP2, XTMP5, XTMP5 # XTMP5 = s1 {xDxC} + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00} + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]} + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + ROTATE_ARGS + rotate_Xs +.endm + +## input is [rsp + _XFER + %1 * 4] +.macro DO_ROUND round + mov e, y0 # y0 = e + MY_ROR (25-11), y0 # y0 = e >> (25-11) + mov a, y1 # y1 = a + xor e, y0 # y0 = e ^ (e >> (25-11)) + MY_ROR (22-13), y1 # y1 = a >> (22-13) + mov f, y2 # y2 = f + xor a, y1 # y1 = a ^ (a >> (22-13) + MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + xor g, y2 # y2 = f^g + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + and e, y2 # y2 = (f^g)&e + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + xor g, y2 # y2 = CH = ((f^g)&e)^g + add y0, y2 # y2 = S1 + CH + MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + offset = \round * 4 + _XFER # + add offset(%rsp), y2 # y2 = k + w + S1 + CH + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + ROTATE_ARGS +.endm + +######################################################################## +## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state +## arg 2 : pointer to input data +## arg 3 : Num blocks +######################################################################## +.text +SYM_TYPED_FUNC_START(sha256_transform_avx) +.align 32 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsp, %rbp + + subq $STACK_SIZE, %rsp # allocate stack space + and $~15, %rsp # align stack pointer + + shl $6, NUM_BLKS # convert to bytes + jz done_hash + add INP, NUM_BLKS # pointer to end of data + mov NUM_BLKS, _INP_END(%rsp) + + ## load initial digest + mov 4*0(CTX), a + mov 4*1(CTX), b + mov 4*2(CTX), c + mov 4*3(CTX), d + mov 4*4(CTX), e + mov 4*5(CTX), f + mov 4*6(CTX), g + mov 4*7(CTX), h + + vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK + vmovdqa _SHUF_00BA(%rip), SHUF_00BA + vmovdqa _SHUF_DC00(%rip), SHUF_DC00 +loop0: + lea K256(%rip), TBL + + ## byte swap first 16 dwords + COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK + + mov INP, _INP(%rsp) + + ## schedule 48 input dwords, by doing 3 rounds of 16 each + mov $3, SRND +.align 16 +loop1: + vpaddd (TBL), X0, XFER + vmovdqa XFER, _XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + vpaddd 1*16(TBL), X0, XFER + vmovdqa XFER, _XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + vpaddd 2*16(TBL), X0, XFER + vmovdqa XFER, _XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + vpaddd 3*16(TBL), X0, XFER + vmovdqa XFER, _XFER(%rsp) + add $4*16, TBL + FOUR_ROUNDS_AND_SCHED + + sub $1, SRND + jne loop1 + + mov $2, SRND +loop2: + vpaddd (TBL), X0, XFER + vmovdqa XFER, _XFER(%rsp) + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + + vpaddd 1*16(TBL), X1, XFER + vmovdqa XFER, _XFER(%rsp) + add $2*16, TBL + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + + vmovdqa X2, X0 + vmovdqa X3, X1 + + sub $1, SRND + jne loop2 + + addm (4*0)(CTX),a + addm (4*1)(CTX),b + addm (4*2)(CTX),c + addm (4*3)(CTX),d + addm (4*4)(CTX),e + addm (4*5)(CTX),f + addm (4*6)(CTX),g + addm (4*7)(CTX),h + + mov _INP(%rsp), INP + add $64, INP + cmp _INP_END(%rsp), INP + jne loop0 + +done_hash: + + mov %rbp, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + RET +SYM_FUNC_END(sha256_transform_avx) + +.section .rodata.cst256.K256, "aM", @progbits, 256 +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 + +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 +# shuffle xBxA -> 00BA +_SHUF_00BA: + .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 + +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 +# shuffle xDxC -> DC00 +_SHUF_DC00: + .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S new file mode 100644 index 000000000..2d2be531a --- /dev/null +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -0,0 +1,768 @@ +######################################################################## +# Implement fast SHA-256 with AVX2 instructions. (x86_64) +# +# Copyright (C) 2013 Intel Corporation. +# +# Authors: +# James Guilford +# Kirk Yap +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################## +# +# This code is described in an Intel White-Paper: +# "Fast SHA-256 Implementations on Intel Architecture Processors" +# +# To find it, surf to http://www.intel.com/p/en_US/embedded +# and search for that title. +# +######################################################################## +# This code schedules 2 blocks at a time, with 4 lanes per block +######################################################################## + +#include +#include + +## assume buffers not aligned +#define VMOVDQ vmovdqu + +################################ Define Macros + +# addm [mem], reg +# Add reg to mem using reg-mem add and store +.macro addm p1 p2 + add \p1, \p2 + mov \p2, \p1 +.endm + +################################ + +X0 = %ymm4 +X1 = %ymm5 +X2 = %ymm6 +X3 = %ymm7 + +# XMM versions of above +XWORD0 = %xmm4 +XWORD1 = %xmm5 +XWORD2 = %xmm6 +XWORD3 = %xmm7 + +XTMP0 = %ymm0 +XTMP1 = %ymm1 +XTMP2 = %ymm2 +XTMP3 = %ymm3 +XTMP4 = %ymm8 +XFER = %ymm9 +XTMP5 = %ymm11 + +SHUF_00BA = %ymm10 # shuffle xBxA -> 00BA +SHUF_DC00 = %ymm12 # shuffle xDxC -> DC00 +BYTE_FLIP_MASK = %ymm13 + +X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK + +NUM_BLKS = %rdx # 3rd arg +INP = %rsi # 2nd arg +CTX = %rdi # 1st arg +c = %ecx +d = %r8d +e = %edx # clobbers NUM_BLKS +y3 = %esi # clobbers INP + +SRND = CTX # SRND is same register as CTX + +a = %eax +b = %ebx +f = %r9d +g = %r10d +h = %r11d +old_h = %r11d + +T1 = %r12d +y0 = %r13d +y1 = %r14d +y2 = %r15d + + +_XFER_SIZE = 2*64*4 # 2 blocks, 64 rounds, 4 bytes/round +_XMM_SAVE_SIZE = 0 +_INP_END_SIZE = 8 +_INP_SIZE = 8 +_CTX_SIZE = 8 + +_XFER = 0 +_XMM_SAVE = _XFER + _XFER_SIZE +_INP_END = _XMM_SAVE + _XMM_SAVE_SIZE +_INP = _INP_END + _INP_END_SIZE +_CTX = _INP + _INP_SIZE +STACK_SIZE = _CTX + _CTX_SIZE + +# rotate_Xs +# Rotate values of symbols X0...X3 +.macro rotate_Xs + X_ = X0 + X0 = X1 + X1 = X2 + X2 = X3 + X3 = X_ +.endm + +# ROTATE_ARGS +# Rotate values of symbols a...h +.macro ROTATE_ARGS + old_h = h + TMP_ = h + h = g + g = f + f = e + e = d + d = c + c = b + b = a + a = TMP_ +.endm + +.macro FOUR_ROUNDS_AND_SCHED disp +################################### RND N + 0 ############################ + + mov a, y3 # y3 = a # MAJA + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + + addl \disp(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7] + mov f, y2 # y2 = f # CH + rorx $13, a, T1 # T1 = a >> 13 # S0B + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + xor g, y2 # y2 = f^g # CH + vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]# y1 = (e >> 6)# S1 + rorx $6, e, y1 # y1 = (e >> 6) # S1 + + and e, y2 # y2 = (f^g)&e # CH + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $22, a, y1 # y1 = a >> 22 # S0A + add h, d # d = k + w + h + d # -- + + and b, y3 # y3 = (a|c)&b # MAJA + vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15] + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + rorx $2, a, T1 # T1 = (a >> 2) # S0 + + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + vpsrld $7, XTMP1, XTMP2 + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + + add y0, y2 # y2 = S1 + CH # -- + vpslld $(32-7), XTMP1, XTMP3 + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + vpor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 + + vpsrld $18, XTMP1, XTMP2 + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + add y3, h # h = t1 + S0 + MAJ # -- + + + ROTATE_ARGS + +################################### RND N + 1 ############################ + + mov a, y3 # y3 = a # MAJA + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + offset = \disp + 1*4 + addl offset(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + + vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3 + mov f, y2 # y2 = f # CH + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + xor g, y2 # y2 = f^g # CH + + + rorx $6, e, y1 # y1 = (e >> 6) # S1 + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $22, a, y1 # y1 = a >> 22 # S0A + and e, y2 # y2 = (f^g)&e # CH + add h, d # d = k + w + h + d # -- + + vpslld $(32-18), XTMP1, XTMP1 + and b, y3 # y3 = (a|c)&b # MAJA + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + + vpxor XTMP1, XTMP3, XTMP3 + rorx $2, a, T1 # T1 = (a >> 2) # S0 + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + + vpxor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 ^ W[-15] ror 18 + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + vpxor XTMP4, XTMP3, XTMP1 # XTMP1 = s0 + vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA} + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0 + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + add y3, h # h = t1 + S0 + MAJ # -- + + vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA} + + + ROTATE_ARGS + +################################### RND N + 2 ############################ + + mov a, y3 # y3 = a # MAJA + rorx $25, e, y0 # y0 = e >> 25 # S1A + offset = \disp + 2*4 + addl offset(%rsp, SRND), h # h = k + w + h # -- + + vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA} + rorx $11, e, y1 # y1 = e >> 11 # S1B + or c, y3 # y3 = a|c # MAJA + mov f, y2 # y2 = f # CH + xor g, y2 # y2 = f^g # CH + + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA} + and e, y2 # y2 = (f^g)&e # CH + + rorx $6, e, y1 # y1 = (e >> 6) # S1 + vpxor XTMP3, XTMP2, XTMP2 + add h, d # d = k + w + h + d # -- + and b, y3 # y3 = (a|c)&b # MAJA + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $22, a, y1 # y1 = a >> 22 # S0A + vpxor XTMP2, XTMP4, XTMP4 # XTMP4 = s1 {xBxA} + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + + vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA} + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + rorx $2, a ,T1 # T1 = (a >> 2) # S0 + vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]} + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC} + + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1,h # h = k + w + h + S0 # -- + add y2,d # d = k + w + h + d + S1 + CH = d + t1 # -- + add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + + add y3,h # h = t1 + S0 + MAJ # -- + + + ROTATE_ARGS + +################################### RND N + 3 ############################ + + mov a, y3 # y3 = a # MAJA + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + offset = \disp + 3*4 + addl offset(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + + vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC} + mov f, y2 # y2 = f # CH + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + xor g, y2 # y2 = f^g # CH + + + vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC} + rorx $6, e, y1 # y1 = (e >> 6) # S1 + and e, y2 # y2 = (f^g)&e # CH + add h, d # d = k + w + h + d # -- + and b, y3 # y3 = (a|c)&b # MAJA + + vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC} + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + + vpxor XTMP3, XTMP2, XTMP2 + rorx $22, a, y1 # y1 = a >> 22 # S0A + add y0, y2 # y2 = S1 + CH # -- + + vpxor XTMP2, XTMP5, XTMP5 # XTMP5 = s1 {xDxC} + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + rorx $2, a, T1 # T1 = (a >> 2) # S0 + vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00} + + vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]} + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + + add y1, h # h = k + w + h + S0 # -- + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + add y3, h # h = t1 + S0 + MAJ # -- + + ROTATE_ARGS + rotate_Xs +.endm + +.macro DO_4ROUNDS disp +################################### RND N + 0 ########################### + + mov f, y2 # y2 = f # CH + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + rorx $6, e, y1 # y1 = (e >> 6) # S1 + and e, y2 # y2 = (f^g)&e # CH + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $22, a, y1 # y1 = a >> 22 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + rorx $2, a, T1 # T1 = (a >> 2) # S0 + addl \disp(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + ROTATE_ARGS + +################################### RND N + 1 ########################### + + add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + mov f, y2 # y2 = f # CH + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + rorx $6, e, y1 # y1 = (e >> 6) # S1 + and e, y2 # y2 = (f^g)&e # CH + add y3, old_h # h = t1 + S0 + MAJ # -- + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $22, a, y1 # y1 = a >> 22 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + rorx $2, a, T1 # T1 = (a >> 2) # S0 + offset = 4*1 + \disp + addl offset(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + ROTATE_ARGS + +################################### RND N + 2 ############################## + + add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + mov f, y2 # y2 = f # CH + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + rorx $6, e, y1 # y1 = (e >> 6) # S1 + and e, y2 # y2 = (f^g)&e # CH + add y3, old_h # h = t1 + S0 + MAJ # -- + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $22, a, y1 # y1 = a >> 22 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + rorx $2, a, T1 # T1 = (a >> 2) # S0 + offset = 4*2 + \disp + addl offset(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + ROTATE_ARGS + +################################### RND N + 3 ########################### + + add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + mov f, y2 # y2 = f # CH + rorx $25, e, y0 # y0 = e >> 25 # S1A + rorx $11, e, y1 # y1 = e >> 11 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 + rorx $6, e, y1 # y1 = (e >> 6) # S1 + and e, y2 # y2 = (f^g)&e # CH + add y3, old_h # h = t1 + S0 + MAJ # -- + + xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 + rorx $13, a, T1 # T1 = a >> 13 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $22, a, y1 # y1 = a >> 22 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 + rorx $2, a, T1 # T1 = (a >> 2) # S0 + offset = 4*3 + \disp + addl offset(%rsp, SRND), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + + add y3, h # h = t1 + S0 + MAJ # -- + + ROTATE_ARGS + +.endm + +######################################################################## +## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state +## arg 2 : pointer to input data +## arg 3 : Num blocks +######################################################################## +.text +SYM_TYPED_FUNC_START(sha256_transform_rorx) +.align 32 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + push %rbp + mov %rsp, %rbp + + subq $STACK_SIZE, %rsp + and $-32, %rsp # align rsp to 32 byte boundary + + shl $6, NUM_BLKS # convert to bytes + jz done_hash + lea -64(INP, NUM_BLKS), NUM_BLKS # pointer to last block + mov NUM_BLKS, _INP_END(%rsp) + + cmp NUM_BLKS, INP + je only_one_block + + ## load initial digest + mov (CTX), a + mov 4*1(CTX), b + mov 4*2(CTX), c + mov 4*3(CTX), d + mov 4*4(CTX), e + mov 4*5(CTX), f + mov 4*6(CTX), g + mov 4*7(CTX), h + + vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK + vmovdqa _SHUF_00BA(%rip), SHUF_00BA + vmovdqa _SHUF_DC00(%rip), SHUF_DC00 + + mov CTX, _CTX(%rsp) + +loop0: + ## Load first 16 dwords from two blocks + VMOVDQ 0*32(INP),XTMP0 + VMOVDQ 1*32(INP),XTMP1 + VMOVDQ 2*32(INP),XTMP2 + VMOVDQ 3*32(INP),XTMP3 + + ## byte swap data + vpshufb BYTE_FLIP_MASK, XTMP0, XTMP0 + vpshufb BYTE_FLIP_MASK, XTMP1, XTMP1 + vpshufb BYTE_FLIP_MASK, XTMP2, XTMP2 + vpshufb BYTE_FLIP_MASK, XTMP3, XTMP3 + + ## transpose data into high/low halves + vperm2i128 $0x20, XTMP2, XTMP0, X0 + vperm2i128 $0x31, XTMP2, XTMP0, X1 + vperm2i128 $0x20, XTMP3, XTMP1, X2 + vperm2i128 $0x31, XTMP3, XTMP1, X3 + +last_block_enter: + add $64, INP + mov INP, _INP(%rsp) + + ## schedule 48 input dwords, by doing 3 rounds of 12 each + xor SRND, SRND + +.align 16 +loop1: + vpaddd K256+0*32(SRND), X0, XFER + vmovdqa XFER, 0*32+_XFER(%rsp, SRND) + FOUR_ROUNDS_AND_SCHED _XFER + 0*32 + + vpaddd K256+1*32(SRND), X0, XFER + vmovdqa XFER, 1*32+_XFER(%rsp, SRND) + FOUR_ROUNDS_AND_SCHED _XFER + 1*32 + + vpaddd K256+2*32(SRND), X0, XFER + vmovdqa XFER, 2*32+_XFER(%rsp, SRND) + FOUR_ROUNDS_AND_SCHED _XFER + 2*32 + + vpaddd K256+3*32(SRND), X0, XFER + vmovdqa XFER, 3*32+_XFER(%rsp, SRND) + FOUR_ROUNDS_AND_SCHED _XFER + 3*32 + + add $4*32, SRND + cmp $3*4*32, SRND + jb loop1 + +loop2: + ## Do last 16 rounds with no scheduling + vpaddd K256+0*32(SRND), X0, XFER + vmovdqa XFER, 0*32+_XFER(%rsp, SRND) + DO_4ROUNDS _XFER + 0*32 + + vpaddd K256+1*32(SRND), X1, XFER + vmovdqa XFER, 1*32+_XFER(%rsp, SRND) + DO_4ROUNDS _XFER + 1*32 + add $2*32, SRND + + vmovdqa X2, X0 + vmovdqa X3, X1 + + cmp $4*4*32, SRND + jb loop2 + + mov _CTX(%rsp), CTX + mov _INP(%rsp), INP + + addm (4*0)(CTX),a + addm (4*1)(CTX),b + addm (4*2)(CTX),c + addm (4*3)(CTX),d + addm (4*4)(CTX),e + addm (4*5)(CTX),f + addm (4*6)(CTX),g + addm (4*7)(CTX),h + + cmp _INP_END(%rsp), INP + ja done_hash + + #### Do second block using previously scheduled results + xor SRND, SRND +.align 16 +loop3: + DO_4ROUNDS _XFER + 0*32 + 16 + DO_4ROUNDS _XFER + 1*32 + 16 + add $2*32, SRND + cmp $4*4*32, SRND + jb loop3 + + mov _CTX(%rsp), CTX + mov _INP(%rsp), INP + add $64, INP + + addm (4*0)(CTX),a + addm (4*1)(CTX),b + addm (4*2)(CTX),c + addm (4*3)(CTX),d + addm (4*4)(CTX),e + addm (4*5)(CTX),f + addm (4*6)(CTX),g + addm (4*7)(CTX),h + + cmp _INP_END(%rsp), INP + jb loop0 + ja done_hash + +do_last_block: + VMOVDQ 0*16(INP),XWORD0 + VMOVDQ 1*16(INP),XWORD1 + VMOVDQ 2*16(INP),XWORD2 + VMOVDQ 3*16(INP),XWORD3 + + vpshufb X_BYTE_FLIP_MASK, XWORD0, XWORD0 + vpshufb X_BYTE_FLIP_MASK, XWORD1, XWORD1 + vpshufb X_BYTE_FLIP_MASK, XWORD2, XWORD2 + vpshufb X_BYTE_FLIP_MASK, XWORD3, XWORD3 + + jmp last_block_enter + +only_one_block: + + ## load initial digest + mov (4*0)(CTX),a + mov (4*1)(CTX),b + mov (4*2)(CTX),c + mov (4*3)(CTX),d + mov (4*4)(CTX),e + mov (4*5)(CTX),f + mov (4*6)(CTX),g + mov (4*7)(CTX),h + + vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK + vmovdqa _SHUF_00BA(%rip), SHUF_00BA + vmovdqa _SHUF_DC00(%rip), SHUF_DC00 + + mov CTX, _CTX(%rsp) + jmp do_last_block + +done_hash: + + mov %rbp, %rsp + pop %rbp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + RET +SYM_FUNC_END(sha256_transform_rorx) + +.section .rodata.cst512.K256, "aM", @progbits, 512 +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 + +# shuffle xBxA -> 00BA +.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32 +.align 32 +_SHUF_00BA: + .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 + +# shuffle xDxC -> DC00 +.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32 +.align 32 +_SHUF_DC00: + .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S new file mode 100644 index 000000000..7db288391 --- /dev/null +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -0,0 +1,514 @@ +######################################################################## +# Implement fast SHA-256 with SSSE3 instructions. (x86_64) +# +# Copyright (C) 2013 Intel Corporation. +# +# Authors: +# James Guilford +# Kirk Yap +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################## +# +# This code is described in an Intel White-Paper: +# "Fast SHA-256 Implementations on Intel Architecture Processors" +# +# To find it, surf to http://www.intel.com/p/en_US/embedded +# and search for that title. +# +######################################################################## + +#include +#include + +## assume buffers not aligned +#define MOVDQ movdqu + +################################ Define Macros + +# addm [mem], reg +# Add reg to mem using reg-mem add and store +.macro addm p1 p2 + add \p1, \p2 + mov \p2, \p1 +.endm + +################################ + +# COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask +# Load xmm with mem and byte swap each dword +.macro COPY_XMM_AND_BSWAP p1 p2 p3 + MOVDQ \p2, \p1 + pshufb \p3, \p1 +.endm + +################################ + +X0 = %xmm4 +X1 = %xmm5 +X2 = %xmm6 +X3 = %xmm7 + +XTMP0 = %xmm0 +XTMP1 = %xmm1 +XTMP2 = %xmm2 +XTMP3 = %xmm3 +XTMP4 = %xmm8 +XFER = %xmm9 + +SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA +SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00 +BYTE_FLIP_MASK = %xmm12 + +NUM_BLKS = %rdx # 3rd arg +INP = %rsi # 2nd arg +CTX = %rdi # 1st arg + +SRND = %rsi # clobbers INP +c = %ecx +d = %r8d +e = %edx +TBL = %r12 +a = %eax +b = %ebx + +f = %r9d +g = %r10d +h = %r11d + +y0 = %r13d +y1 = %r14d +y2 = %r15d + + + +_INP_END_SIZE = 8 +_INP_SIZE = 8 +_XFER_SIZE = 16 +_XMM_SAVE_SIZE = 0 + +_INP_END = 0 +_INP = _INP_END + _INP_END_SIZE +_XFER = _INP + _INP_SIZE +_XMM_SAVE = _XFER + _XFER_SIZE +STACK_SIZE = _XMM_SAVE + _XMM_SAVE_SIZE + +# rotate_Xs +# Rotate values of symbols X0...X3 +.macro rotate_Xs +X_ = X0 +X0 = X1 +X1 = X2 +X2 = X3 +X3 = X_ +.endm + +# ROTATE_ARGS +# Rotate values of symbols a...h +.macro ROTATE_ARGS +TMP_ = h +h = g +g = f +f = e +e = d +d = c +c = b +b = a +a = TMP_ +.endm + +.macro FOUR_ROUNDS_AND_SCHED + ## compute s0 four at a time and s1 two at a time + ## compute W[-16] + W[-7] 4 at a time + movdqa X3, XTMP0 + mov e, y0 # y0 = e + ror $(25-11), y0 # y0 = e >> (25-11) + mov a, y1 # y1 = a + palignr $4, X2, XTMP0 # XTMP0 = W[-7] + ror $(22-13), y1 # y1 = a >> (22-13) + xor e, y0 # y0 = e ^ (e >> (25-11)) + mov f, y2 # y2 = f + ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + movdqa X1, XTMP1 + xor a, y1 # y1 = a ^ (a >> (22-13) + xor g, y2 # y2 = f^g + paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16] + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + ## compute s0 + palignr $4, X0, XTMP1 # XTMP1 = W[-15] + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + xor g, y2 # y2 = CH = ((f^g)&e)^g + movdqa XTMP1, XTMP2 # XTMP2 = W[-15] + ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + add y0, y2 # y2 = S1 + CH + add _XFER(%rsp) , y2 # y2 = k + w + S1 + CH + movdqa XTMP1, XTMP3 # XTMP3 = W[-15] + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + pslld $(32-7), XTMP1 # + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + psrld $7, XTMP2 # + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + # + ROTATE_ARGS # + movdqa XTMP3, XTMP2 # XTMP2 = W[-15] + mov e, y0 # y0 = e + mov a, y1 # y1 = a + movdqa XTMP3, XTMP4 # XTMP4 = W[-15] + ror $(25-11), y0 # y0 = e >> (25-11) + xor e, y0 # y0 = e ^ (e >> (25-11)) + mov f, y2 # y2 = f + ror $(22-13), y1 # y1 = a >> (22-13) + pslld $(32-18), XTMP3 # + xor a, y1 # y1 = a ^ (a >> (22-13) + ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + xor g, y2 # y2 = f^g + psrld $18, XTMP2 # + ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + pxor XTMP3, XTMP1 + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + xor g, y2 # y2 = CH = ((f^g)&e)^g + psrld $3, XTMP4 # XTMP4 = W[-15] >> 3 + add y0, y2 # y2 = S1 + CH + add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH + ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + pxor XTMP4, XTMP1 # XTMP1 = s0 + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + ## compute low s1 + pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA} + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + + ROTATE_ARGS + movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA} + mov e, y0 # y0 = e + mov a, y1 # y1 = a + ror $(25-11), y0 # y0 = e >> (25-11) + movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA} + xor e, y0 # y0 = e ^ (e >> (25-11)) + ror $(22-13), y1 # y1 = a >> (22-13) + mov f, y2 # y2 = f + xor a, y1 # y1 = a ^ (a >> (22-13) + ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA} + xor g, y2 # y2 = f^g + psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA} + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + and e, y2 # y2 = (f^g)&e + psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA} + ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + xor g, y2 # y2 = CH = ((f^g)&e)^g + ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + pxor XTMP3, XTMP2 + add y0, y2 # y2 = S1 + CH + ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH + pxor XTMP2, XTMP4 # XTMP4 = s1 {xBxA} + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + pshufb SHUF_00BA, XTMP4 # XTMP4 = s1 {00BA} + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + paddd XTMP4, XTMP0 # XTMP0 = {..., ..., W[1], W[0]} + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + ## compute high s1 + pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA} + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + # + ROTATE_ARGS # + movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC} + mov e, y0 # y0 = e + ror $(25-11), y0 # y0 = e >> (25-11) + mov a, y1 # y1 = a + movdqa XTMP2, X0 # X0 = W[-2] {DDCC} + ror $(22-13), y1 # y1 = a >> (22-13) + xor e, y0 # y0 = e ^ (e >> (25-11)) + mov f, y2 # y2 = f + ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC} + xor a, y1 # y1 = a ^ (a >> (22-13) + xor g, y2 # y2 = f^g + psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC} + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25 + and e, y2 # y2 = (f^g)&e + ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + psrld $10, X0 # X0 = W[-2] >> 10 {DDCC} + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22 + ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2 + xor g, y2 # y2 = CH = ((f^g)&e)^g + pxor XTMP3, XTMP2 # + ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2 + add y0, y2 # y2 = S1 + CH + add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH + pxor XTMP2, X0 # X0 = s1 {xDxC} + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + pshufb SHUF_DC00, X0 # X0 = s1 {DC00} + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]} + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + + ROTATE_ARGS + rotate_Xs +.endm + +## input is [rsp + _XFER + %1 * 4] +.macro DO_ROUND round + mov e, y0 # y0 = e + ror $(25-11), y0 # y0 = e >> (25-11) + mov a, y1 # y1 = a + xor e, y0 # y0 = e ^ (e >> (25-11)) + ror $(22-13), y1 # y1 = a >> (22-13) + mov f, y2 # y2 = f + xor a, y1 # y1 = a ^ (a >> (22-13) + ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) + xor g, y2 # y2 = f^g + xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) + and e, y2 # y2 = (f^g)&e + xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + xor g, y2 # y2 = CH = ((f^g)&e)^g + add y0, y2 # y2 = S1 + CH + ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + offset = \round * 4 + _XFER + add offset(%rsp), y2 # y2 = k + w + S1 + CH + mov a, y0 # y0 = a + add y2, h # h = h + S1 + CH + k + w + mov a, y2 # y2 = a + or c, y0 # y0 = a|c + add h, d # d = d + h + S1 + CH + k + w + and c, y2 # y2 = a&c + and b, y0 # y0 = (a|c)&b + add y1, h # h = h + S1 + CH + k + w + S0 + or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) + add y0, h # h = h + S1 + CH + k + w + S0 + MAJ + ROTATE_ARGS +.endm + +######################################################################## +## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, +## int blocks); +## arg 1 : pointer to state +## (struct sha256_state is assumed to begin with u32 state[8]) +## arg 2 : pointer to input data +## arg 3 : Num blocks +######################################################################## +.text +SYM_TYPED_FUNC_START(sha256_transform_ssse3) +.align 32 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + mov %rsp, %rbp + + subq $STACK_SIZE, %rsp + and $~15, %rsp + + shl $6, NUM_BLKS # convert to bytes + jz done_hash + add INP, NUM_BLKS + mov NUM_BLKS, _INP_END(%rsp) # pointer to end of data + + ## load initial digest + mov 4*0(CTX), a + mov 4*1(CTX), b + mov 4*2(CTX), c + mov 4*3(CTX), d + mov 4*4(CTX), e + mov 4*5(CTX), f + mov 4*6(CTX), g + mov 4*7(CTX), h + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK + movdqa _SHUF_00BA(%rip), SHUF_00BA + movdqa _SHUF_DC00(%rip), SHUF_DC00 + +loop0: + lea K256(%rip), TBL + + ## byte swap first 16 dwords + COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK + COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK + + mov INP, _INP(%rsp) + + ## schedule 48 input dwords, by doing 3 rounds of 16 each + mov $3, SRND +.align 16 +loop1: + movdqa (TBL), XFER + paddd X0, XFER + movdqa XFER, _XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + movdqa 1*16(TBL), XFER + paddd X0, XFER + movdqa XFER, _XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + movdqa 2*16(TBL), XFER + paddd X0, XFER + movdqa XFER, _XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + movdqa 3*16(TBL), XFER + paddd X0, XFER + movdqa XFER, _XFER(%rsp) + add $4*16, TBL + FOUR_ROUNDS_AND_SCHED + + sub $1, SRND + jne loop1 + + mov $2, SRND +loop2: + paddd (TBL), X0 + movdqa X0, _XFER(%rsp) + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + paddd 1*16(TBL), X1 + movdqa X1, _XFER(%rsp) + add $2*16, TBL + DO_ROUND 0 + DO_ROUND 1 + DO_ROUND 2 + DO_ROUND 3 + + movdqa X2, X0 + movdqa X3, X1 + + sub $1, SRND + jne loop2 + + addm (4*0)(CTX),a + addm (4*1)(CTX),b + addm (4*2)(CTX),c + addm (4*3)(CTX),d + addm (4*4)(CTX),e + addm (4*5)(CTX),f + addm (4*6)(CTX),g + addm (4*7)(CTX),h + + mov _INP(%rsp), INP + add $64, INP + cmp _INP_END(%rsp), INP + jne loop0 + +done_hash: + + mov %rbp, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + + RET +SYM_FUNC_END(sha256_transform_ssse3) + +.section .rodata.cst256.K256, "aM", @progbits, 256 +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 + +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 +# shuffle xBxA -> 00BA +_SHUF_00BA: + .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 + +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 +# shuffle xDxC -> DC00 +_SHUF_DC00: + .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S new file mode 100644 index 000000000..47f93937f --- /dev/null +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -0,0 +1,356 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define SHA256CONSTANTS %rax + +#define MSG %xmm0 +#define STATE0 %xmm1 +#define STATE1 %xmm2 +#define MSGTMP0 %xmm3 +#define MSGTMP1 %xmm4 +#define MSGTMP2 %xmm5 +#define MSGTMP3 %xmm6 +#define MSGTMP4 %xmm7 + +#define SHUF_MASK %xmm8 + +#define ABEF_SAVE %xmm9 +#define CDGH_SAVE %xmm10 + +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha256_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks); + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ + +.text +.align 32 +SYM_TYPED_FUNC_START(sha256_ni_transform) + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* + * load initial hash values + * Need to reorder these appropriately + * DCBA, HGFE -> ABEF, CDGH + */ + movdqu 0*16(DIGEST_PTR), STATE0 + movdqu 1*16(DIGEST_PTR), STATE1 + + pshufd $0xB1, STATE0, STATE0 /* CDAB */ + pshufd $0x1B, STATE1, STATE1 /* EFGH */ + movdqa STATE0, MSGTMP4 + palignr $8, STATE1, STATE0 /* ABEF */ + pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + lea K256(%rip), SHA256CONSTANTS + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa STATE0, ABEF_SAVE + movdqa STATE1, CDGH_SAVE + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP0 + paddd 0*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP1 + paddd 1*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP2 + paddd 2*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP3 + paddd 3*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 16-19 */ + movdqa MSGTMP0, MSG + paddd 4*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 20-23 */ + movdqa MSGTMP1, MSG + paddd 5*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 24-27 */ + movdqa MSGTMP2, MSG + paddd 6*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 28-31 */ + movdqa MSGTMP3, MSG + paddd 7*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 32-35 */ + movdqa MSGTMP0, MSG + paddd 8*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 36-39 */ + movdqa MSGTMP1, MSG + paddd 9*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 40-43 */ + movdqa MSGTMP2, MSG + paddd 10*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 44-47 */ + movdqa MSGTMP3, MSG + paddd 11*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 48-51 */ + movdqa MSGTMP0, MSG + paddd 12*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 52-55 */ + movdqa MSGTMP1, MSG + paddd 13*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 56-59 */ + movdqa MSGTMP2, MSG + paddd 14*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 60-63 */ + movdqa MSGTMP3, MSG + paddd 15*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Add current hash values with previously saved */ + paddd ABEF_SAVE, STATE0 + paddd CDGH_SAVE, STATE1 + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, STATE0, STATE0 /* FEBA */ + pshufd $0xB1, STATE1, STATE1 /* DCHG */ + movdqa STATE0, MSGTMP4 + pblendw $0xF0, STATE1, STATE0 /* DCBA */ + palignr $8, MSGTMP4, STATE1 /* HGFE */ + + movdqu STATE0, 0*16(DIGEST_PTR) + movdqu STATE1, 1*16(DIGEST_PTR) + +.Ldone_hash: + + RET +SYM_FUNC_END(sha256_ni_transform) + +.section .rodata.cst256.K256, "aM", @progbits, 256 +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c new file mode 100644 index 000000000..d25235f0c --- /dev/null +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -0,0 +1,432 @@ +/* + * Cryptographic API. + * + * Glue code for the SHA256 Secure Hash Algorithm assembler + * implementation using supplemental SSE3 / AVX / AVX2 instructions. + * + * This file is based on sha256_generic.c + * + * Copyright (C) 2013 Intel Corporation. + * + * Author: + * Tim Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void sha256_transform_ssse3(struct sha256_state *state, + const u8 *data, int blocks); + +static const struct x86_cpu_id module_cpu_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), + X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), + X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); + +static int _sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_block_fn *sha256_xform) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) + return crypto_sha256_update(desc, data, len); + + /* + * Make sure struct sha256_state begins directly with the SHA256 + * 256-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); + + kernel_fpu_begin(); + sha256_base_do_update(desc, data, len, sha256_xform); + kernel_fpu_end(); + + return 0; +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha256_block_fn *sha256_xform) +{ + if (!crypto_simd_usable()) + return crypto_sha256_finup(desc, data, len, out); + + kernel_fpu_begin(); + if (len) + sha256_base_do_update(desc, data, len, sha256_xform); + sha256_base_do_finalize(desc, sha256_xform); + kernel_fpu_end(); + + return sha256_base_finish(desc, out); +} + +static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return _sha256_update(desc, data, len, sha256_transform_ssse3); +} + +static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_ssse3); +} + +/* Add padding and return the message digest. */ +static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) +{ + return sha256_ssse3_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_ssse3_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_ssse3_update, + .final = sha256_ssse3_final, + .finup = sha256_ssse3_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ssse3", + .cra_priority = 150, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_ssse3_update, + .final = sha256_ssse3_final, + .finup = sha256_ssse3_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ssse3", + .cra_priority = 150, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int register_sha256_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); + return 0; +} + +static void unregister_sha256_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); +} + +asmlinkage void sha256_transform_avx(struct sha256_state *state, + const u8 *data, int blocks); + +static int sha256_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return _sha256_update(desc, data, len, sha256_transform_avx); +} + +static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_avx); +} + +static int sha256_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha256_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_avx_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_avx_update, + .final = sha256_avx_final, + .finup = sha256_avx_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-avx", + .cra_priority = 160, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_avx_update, + .final = sha256_avx_final, + .finup = sha256_avx_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-avx", + .cra_priority = 160, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx_usable(void) +{ + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + if (boot_cpu_has(X86_FEATURE_AVX)) + pr_info("AVX detected but unusable.\n"); + return false; + } + + return true; +} + +static int register_sha256_avx(void) +{ + if (avx_usable()) + return crypto_register_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); + return 0; +} + +static void unregister_sha256_avx(void) +{ + if (avx_usable()) + crypto_unregister_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); +} + +asmlinkage void sha256_transform_rorx(struct sha256_state *state, + const u8 *data, int blocks); + +static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return _sha256_update(desc, data, len, sha256_transform_rorx); +} + +static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_rorx); +} + +static int sha256_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha256_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_avx2_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_avx2_update, + .final = sha256_avx2_final, + .finup = sha256_avx2_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-avx2", + .cra_priority = 170, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_avx2_update, + .final = sha256_avx2_final, + .finup = sha256_avx2_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-avx2", + .cra_priority = 170, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static int register_sha256_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); + return 0; +} + +static void unregister_sha256_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); +} + +#ifdef CONFIG_AS_SHA256_NI +asmlinkage void sha256_ni_transform(struct sha256_state *digest, + const u8 *data, int rounds); + +static int sha256_ni_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return _sha256_update(desc, data, len, sha256_ni_transform); +} + +static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_ni_transform); +} + +static int sha256_ni_final(struct shash_desc *desc, u8 *out) +{ + return sha256_ni_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_ni_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_ni_update, + .final = sha256_ni_final, + .finup = sha256_ni_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ni", + .cra_priority = 250, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_ni_update, + .final = sha256_ni_final, + .finup = sha256_ni_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ni", + .cra_priority = 250, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int register_sha256_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + return crypto_register_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); + return 0; +} + +static void unregister_sha256_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + crypto_unregister_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); +} + +#else +static inline int register_sha256_ni(void) { return 0; } +static inline void unregister_sha256_ni(void) { } +#endif + +static int __init sha256_ssse3_mod_init(void) +{ + if (!x86_match_cpu(module_cpu_ids)) + return -ENODEV; + + if (register_sha256_ssse3()) + goto fail; + + if (register_sha256_avx()) { + unregister_sha256_ssse3(); + goto fail; + } + + if (register_sha256_avx2()) { + unregister_sha256_avx(); + unregister_sha256_ssse3(); + goto fail; + } + + if (register_sha256_ni()) { + unregister_sha256_avx2(); + unregister_sha256_avx(); + unregister_sha256_ssse3(); + goto fail; + } + + return 0; +fail: + return -ENODEV; +} + +static void __exit sha256_ssse3_mod_fini(void) +{ + unregister_sha256_ni(); + unregister_sha256_avx2(); + unregister_sha256_avx(); + unregister_sha256_ssse3(); +} + +module_init(sha256_ssse3_mod_init); +module_exit(sha256_ssse3_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); + +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha256-ssse3"); +MODULE_ALIAS_CRYPTO("sha256-avx"); +MODULE_ALIAS_CRYPTO("sha256-avx2"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha224-ssse3"); +MODULE_ALIAS_CRYPTO("sha224-avx"); +MODULE_ALIAS_CRYPTO("sha224-avx2"); +#ifdef CONFIG_AS_SHA256_NI +MODULE_ALIAS_CRYPTO("sha256-ni"); +MODULE_ALIAS_CRYPTO("sha224-ni"); +#endif diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S new file mode 100644 index 000000000..b0984f19f --- /dev/null +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -0,0 +1,423 @@ +######################################################################## +# Implement fast SHA-512 with AVX instructions. (x86_64) +# +# Copyright (C) 2013 Intel Corporation. +# +# Authors: +# James Guilford +# Kirk Yap +# David Cote +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################## +# +# This code is described in an Intel White-Paper: +# "Fast SHA-512 Implementations on Intel Architecture Processors" +# +# To find it, surf to http://www.intel.com/p/en_US/embedded +# and search for that title. +# +######################################################################## + +#include +#include + +.text + +# Virtual Registers +# ARG1 +digest = %rdi +# ARG2 +msg = %rsi +# ARG3 +msglen = %rdx +T1 = %rcx +T2 = %r8 +a_64 = %r9 +b_64 = %r10 +c_64 = %r11 +d_64 = %r12 +e_64 = %r13 +f_64 = %r14 +g_64 = %r15 +h_64 = %rbx +tmp0 = %rax + +# Local variables (stack frame) + +# Message Schedule +W_SIZE = 80*8 +# W[t] + K[t] | W[t+1] + K[t+1] +WK_SIZE = 2*8 + +frame_W = 0 +frame_WK = frame_W + W_SIZE +frame_size = frame_WK + WK_SIZE + +# Useful QWORD "arrays" for simpler memory references +# MSG, DIGEST, K_t, W_t are arrays +# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even + +# Input message (arg1) +#define MSG(i) 8*i(msg) + +# Output Digest (arg2) +#define DIGEST(i) 8*i(digest) + +# SHA Constants (static mem) +#define K_t(i) 8*i+K512(%rip) + +# Message Schedule (stack frame) +#define W_t(i) 8*i+frame_W(%rsp) + +# W[t]+K[t] (stack frame) +#define WK_2(i) 8*((i%2))+frame_WK(%rsp) + +.macro RotateState + # Rotate symbols a..h right + TMP = h_64 + h_64 = g_64 + g_64 = f_64 + f_64 = e_64 + e_64 = d_64 + d_64 = c_64 + c_64 = b_64 + b_64 = a_64 + a_64 = TMP +.endm + +.macro RORQ p1 p2 + # shld is faster than ror on Sandybridge + shld $(64-\p2), \p1, \p1 +.endm + +.macro SHA512_Round rnd + # Compute Round %%t + mov f_64, T1 # T1 = f + mov e_64, tmp0 # tmp = e + xor g_64, T1 # T1 = f ^ g + RORQ tmp0, 23 # 41 # tmp = e ror 23 + and e_64, T1 # T1 = (f ^ g) & e + xor e_64, tmp0 # tmp = (e ror 23) ^ e + xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g) + idx = \rnd + add WK_2(idx), T1 # W[t] + K[t] from message scheduler + RORQ tmp0, 4 # 18 # tmp = ((e ror 23) ^ e) ror 4 + xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e + mov a_64, T2 # T2 = a + add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h + RORQ tmp0, 14 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) + add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e) + mov a_64, tmp0 # tmp = a + xor c_64, T2 # T2 = a ^ c + and c_64, tmp0 # tmp = a & c + and b_64, T2 # T2 = (a ^ c) & b + xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) + mov a_64, tmp0 # tmp = a + RORQ tmp0, 5 # 39 # tmp = a ror 5 + xor a_64, tmp0 # tmp = (a ror 5) ^ a + add T1, d_64 # e(next_state) = d + T1 + RORQ tmp0, 6 # 34 # tmp = ((a ror 5) ^ a) ror 6 + xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a + lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c) + RORQ tmp0, 28 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) + add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a) + RotateState +.endm + +.macro SHA512_2Sched_2Round_avx rnd + # Compute rounds t-2 and t-1 + # Compute message schedule QWORDS t and t+1 + + # Two rounds are computed based on the values for K[t-2]+W[t-2] and + # K[t-1]+W[t-1] which were previously stored at WK_2 by the message + # scheduler. + # The two new schedule QWORDS are stored at [W_t(t)] and [W_t(t+1)]. + # They are then added to their respective SHA512 constants at + # [K_t(t)] and [K_t(t+1)] and stored at dqword [WK_2(t)] + # For brievity, the comments following vectored instructions only refer to + # the first of a pair of QWORDS. + # Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]} + # The computation of the message schedule and the rounds are tightly + # stitched to take advantage of instruction-level parallelism. + + idx = \rnd - 2 + vmovdqa W_t(idx), %xmm4 # XMM4 = W[t-2] + idx = \rnd - 15 + vmovdqu W_t(idx), %xmm5 # XMM5 = W[t-15] + mov f_64, T1 + vpsrlq $61, %xmm4, %xmm0 # XMM0 = W[t-2]>>61 + mov e_64, tmp0 + vpsrlq $1, %xmm5, %xmm6 # XMM6 = W[t-15]>>1 + xor g_64, T1 + RORQ tmp0, 23 # 41 + vpsrlq $19, %xmm4, %xmm1 # XMM1 = W[t-2]>>19 + and e_64, T1 + xor e_64, tmp0 + vpxor %xmm1, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 + xor g_64, T1 + idx = \rnd + add WK_2(idx), T1# + vpsrlq $8, %xmm5, %xmm7 # XMM7 = W[t-15]>>8 + RORQ tmp0, 4 # 18 + vpsrlq $6, %xmm4, %xmm2 # XMM2 = W[t-2]>>6 + xor e_64, tmp0 + mov a_64, T2 + add h_64, T1 + vpxor %xmm7, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 + RORQ tmp0, 14 # 14 + add tmp0, T1 + vpsrlq $7, %xmm5, %xmm8 # XMM8 = W[t-15]>>7 + mov a_64, tmp0 + xor c_64, T2 + vpsllq $(64-61), %xmm4, %xmm3 # XMM3 = W[t-2]<<3 + and c_64, tmp0 + and b_64, T2 + vpxor %xmm3, %xmm2, %xmm2 # XMM2 = W[t-2]>>6 ^ W[t-2]<<3 + xor tmp0, T2 + mov a_64, tmp0 + vpsllq $(64-1), %xmm5, %xmm9 # XMM9 = W[t-15]<<63 + RORQ tmp0, 5 # 39 + vpxor %xmm9, %xmm8, %xmm8 # XMM8 = W[t-15]>>7 ^ W[t-15]<<63 + xor a_64, tmp0 + add T1, d_64 + RORQ tmp0, 6 # 34 + xor a_64, tmp0 + vpxor %xmm8, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^ + # W[t-15]>>7 ^ W[t-15]<<63 + lea (T1, T2), h_64 + RORQ tmp0, 28 # 28 + vpsllq $(64-19), %xmm4, %xmm4 # XMM4 = W[t-2]<<25 + add tmp0, h_64 + RotateState + vpxor %xmm4, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^ + # W[t-2]<<25 + mov f_64, T1 + vpxor %xmm2, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + mov e_64, tmp0 + xor g_64, T1 + idx = \rnd - 16 + vpaddq W_t(idx), %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] + idx = \rnd - 7 + vmovdqu W_t(idx), %xmm1 # XMM1 = W[t-7] + RORQ tmp0, 23 # 41 + and e_64, T1 + xor e_64, tmp0 + xor g_64, T1 + vpsllq $(64-8), %xmm5, %xmm5 # XMM5 = W[t-15]<<56 + idx = \rnd + 1 + add WK_2(idx), T1 + vpxor %xmm5, %xmm6, %xmm6 # XMM6 = s0(W[t-15]) + RORQ tmp0, 4 # 18 + vpaddq %xmm6, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15]) + xor e_64, tmp0 + vpaddq %xmm1, %xmm0, %xmm0 # XMM0 = W[t] = s1(W[t-2]) + W[t-7] + + # s0(W[t-15]) + W[t-16] + mov a_64, T2 + add h_64, T1 + RORQ tmp0, 14 # 14 + add tmp0, T1 + idx = \rnd + vmovdqa %xmm0, W_t(idx) # Store W[t] + vpaddq K_t(idx), %xmm0, %xmm0 # Compute W[t]+K[t] + vmovdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds + mov a_64, tmp0 + xor c_64, T2 + and c_64, tmp0 + and b_64, T2 + xor tmp0, T2 + mov a_64, tmp0 + RORQ tmp0, 5 # 39 + xor a_64, tmp0 + add T1, d_64 + RORQ tmp0, 6 # 34 + xor a_64, tmp0 + lea (T1, T2), h_64 + RORQ tmp0, 28 # 28 + add tmp0, h_64 + RotateState +.endm + +######################################################################## +# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks +######################################################################## +SYM_TYPED_FUNC_START(sha512_transform_avx) + test msglen, msglen + je nowork + + # Save GPRs + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + # Allocate Stack Space + push %rbp + mov %rsp, %rbp + sub $frame_size, %rsp + and $~(0x20 - 1), %rsp + +updateblock: + + # Load state variables + mov DIGEST(0), a_64 + mov DIGEST(1), b_64 + mov DIGEST(2), c_64 + mov DIGEST(3), d_64 + mov DIGEST(4), e_64 + mov DIGEST(5), f_64 + mov DIGEST(6), g_64 + mov DIGEST(7), h_64 + + t = 0 + .rept 80/2 + 1 + # (80 rounds) / (2 rounds/iteration) + (1 iteration) + # +1 iteration because the scheduler leads hashing by 1 iteration + .if t < 2 + # BSWAP 2 QWORDS + vmovdqa XMM_QWORD_BSWAP(%rip), %xmm1 + vmovdqu MSG(t), %xmm0 + vpshufb %xmm1, %xmm0, %xmm0 # BSWAP + vmovdqa %xmm0, W_t(t) # Store Scheduled Pair + vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t] + vmovdqa %xmm0, WK_2(t) # Store into WK for rounds + .elseif t < 16 + # BSWAP 2 QWORDS# Compute 2 Rounds + vmovdqu MSG(t), %xmm0 + vpshufb %xmm1, %xmm0, %xmm0 # BSWAP + SHA512_Round t-2 # Round t-2 + vmovdqa %xmm0, W_t(t) # Store Scheduled Pair + vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t] + SHA512_Round t-1 # Round t-1 + vmovdqa %xmm0, WK_2(t)# Store W[t]+K[t] into WK + .elseif t < 79 + # Schedule 2 QWORDS# Compute 2 Rounds + SHA512_2Sched_2Round_avx t + .else + # Compute 2 Rounds + SHA512_Round t-2 + SHA512_Round t-1 + .endif + t = t+2 + .endr + + # Update digest + add a_64, DIGEST(0) + add b_64, DIGEST(1) + add c_64, DIGEST(2) + add d_64, DIGEST(3) + add e_64, DIGEST(4) + add f_64, DIGEST(5) + add g_64, DIGEST(6) + add h_64, DIGEST(7) + + # Advance to next message block + add $16*8, msg + dec msglen + jnz updateblock + + # Restore Stack Pointer + mov %rbp, %rsp + pop %rbp + + # Restore GPRs + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + +nowork: + RET +SYM_FUNC_END(sha512_transform_avx) + +######################################################################## +### Binary Data + +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 +.align 16 +# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. +XMM_QWORD_BSWAP: + .octa 0x08090a0b0c0d0e0f0001020304050607 + +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 +# K[t] used in SHA512 hashing +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S new file mode 100644 index 000000000..b1ca99055 --- /dev/null +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -0,0 +1,749 @@ +######################################################################## +# Implement fast SHA-512 with AVX2 instructions. (x86_64) +# +# Copyright (C) 2013 Intel Corporation. +# +# Authors: +# James Guilford +# Kirk Yap +# David Cote +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################## +# +# This code is described in an Intel White-Paper: +# "Fast SHA-512 Implementations on Intel Architecture Processors" +# +# To find it, surf to http://www.intel.com/p/en_US/embedded +# and search for that title. +# +######################################################################## +# This code schedules 1 blocks at a time, with 4 lanes per block +######################################################################## + +#include +#include + +.text + +# Virtual Registers +Y_0 = %ymm4 +Y_1 = %ymm5 +Y_2 = %ymm6 +Y_3 = %ymm7 + +YTMP0 = %ymm0 +YTMP1 = %ymm1 +YTMP2 = %ymm2 +YTMP3 = %ymm3 +YTMP4 = %ymm8 +XFER = YTMP0 + +BYTE_FLIP_MASK = %ymm9 + +# 1st arg is %rdi, which is saved to the stack and accessed later via %r12 +CTX1 = %rdi +CTX2 = %r12 +# 2nd arg +INP = %rsi +# 3rd arg +NUM_BLKS = %rdx + +c = %rcx +d = %r8 +e = %rdx +y3 = %rsi + +TBL = %rdi # clobbers CTX1 + +a = %rax +b = %rbx + +f = %r9 +g = %r10 +h = %r11 +old_h = %r11 + +T1 = %r12 # clobbers CTX2 +y0 = %r13 +y1 = %r14 +y2 = %r15 + +# Local variables (stack frame) +XFER_SIZE = 4*8 +SRND_SIZE = 1*8 +INP_SIZE = 1*8 +INPEND_SIZE = 1*8 +CTX_SIZE = 1*8 + +frame_XFER = 0 +frame_SRND = frame_XFER + XFER_SIZE +frame_INP = frame_SRND + SRND_SIZE +frame_INPEND = frame_INP + INP_SIZE +frame_CTX = frame_INPEND + INPEND_SIZE +frame_size = frame_CTX + CTX_SIZE + +## assume buffers not aligned +#define VMOVDQ vmovdqu + +# addm [mem], reg +# Add reg to mem using reg-mem add and store +.macro addm p1 p2 + add \p1, \p2 + mov \p2, \p1 +.endm + + +# COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask +# Load ymm with mem and byte swap each dword +.macro COPY_YMM_AND_BSWAP p1 p2 p3 + VMOVDQ \p2, \p1 + vpshufb \p3, \p1, \p1 +.endm +# rotate_Ys +# Rotate values of symbols Y0...Y3 +.macro rotate_Ys + Y_ = Y_0 + Y_0 = Y_1 + Y_1 = Y_2 + Y_2 = Y_3 + Y_3 = Y_ +.endm + +# RotateState +.macro RotateState + # Rotate symbols a..h right + old_h = h + TMP_ = h + h = g + g = f + f = e + e = d + d = c + c = b + b = a + a = TMP_ +.endm + +# macro MY_VPALIGNR YDST, YSRC1, YSRC2, RVAL +# YDST = {YSRC1, YSRC2} >> RVAL*8 +.macro MY_VPALIGNR YDST YSRC1 YSRC2 RVAL + vperm2f128 $0x3, \YSRC2, \YSRC1, \YDST # YDST = {YS1_LO, YS2_HI} + vpalignr $\RVAL, \YSRC2, \YDST, \YDST # YDST = {YDS1, YS2} >> RVAL*8 +.endm + +.macro FOUR_ROUNDS_AND_SCHED +################################### RND N + 0 ######################################### + + # Extract w[t-7] + MY_VPALIGNR YTMP0, Y_3, Y_2, 8 # YTMP0 = W[-7] + # Calculate w[t-16] + w[t-7] + vpaddq Y_0, YTMP0, YTMP0 # YTMP0 = W[-7] + W[-16] + # Extract w[t-15] + MY_VPALIGNR YTMP1, Y_1, Y_0, 8 # YTMP1 = W[-15] + + # Calculate sigma0 + + # Calculate w[t-15] ror 1 + vpsrlq $1, YTMP1, YTMP2 + vpsllq $(64-1), YTMP1, YTMP3 + vpor YTMP2, YTMP3, YTMP3 # YTMP3 = W[-15] ror 1 + # Calculate w[t-15] shr 7 + vpsrlq $7, YTMP1, YTMP4 # YTMP4 = W[-15] >> 7 + + mov a, y3 # y3 = a # MAJA + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + add frame_XFER(%rsp),h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + mov f, y2 # y2 = f # CH + rorx $34, a, T1 # T1 = a >> 34 # S0B + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + xor g, y2 # y2 = f^g # CH + rorx $14, e, y1 # y1 = (e >> 14) # S1 + + and e, y2 # y2 = (f^g)&e # CH + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $39, a, y1 # y1 = a >> 39 # S0A + add h, d # d = k + w + h + d # -- + + and b, y3 # y3 = (a|c)&b # MAJA + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + rorx $28, a, T1 # T1 = (a >> 28) # S0 + + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + + add y0, y2 # y2 = S1 + CH # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + add y3, h # h = t1 + S0 + MAJ # -- + + RotateState + +################################### RND N + 1 ######################################### + + # Calculate w[t-15] ror 8 + vpsrlq $8, YTMP1, YTMP2 + vpsllq $(64-8), YTMP1, YTMP1 + vpor YTMP2, YTMP1, YTMP1 # YTMP1 = W[-15] ror 8 + # XOR the three components + vpxor YTMP4, YTMP3, YTMP3 # YTMP3 = W[-15] ror 1 ^ W[-15] >> 7 + vpxor YTMP1, YTMP3, YTMP1 # YTMP1 = s0 + + + # Add three components, w[t-16], w[t-7] and sigma0 + vpaddq YTMP1, YTMP0, YTMP0 # YTMP0 = W[-16] + W[-7] + s0 + # Move to appropriate lanes for calculating w[16] and w[17] + vperm2f128 $0x0, YTMP0, YTMP0, Y_0 # Y_0 = W[-16] + W[-7] + s0 {BABA} + # Move to appropriate lanes for calculating w[18] and w[19] + vpand MASK_YMM_LO(%rip), YTMP0, YTMP0 # YTMP0 = W[-16] + W[-7] + s0 {DC00} + + # Calculate w[16] and w[17] in both 128 bit lanes + + # Calculate sigma1 for w[16] and w[17] on both 128 bit lanes + vperm2f128 $0x11, Y_3, Y_3, YTMP2 # YTMP2 = W[-2] {BABA} + vpsrlq $6, YTMP2, YTMP4 # YTMP4 = W[-2] >> 6 {BABA} + + + mov a, y3 # y3 = a # MAJA + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + add 1*8+frame_XFER(%rsp), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + + mov f, y2 # y2 = f # CH + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + xor g, y2 # y2 = f^g # CH + + + rorx $14, e, y1 # y1 = (e >> 14) # S1 + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $39, a, y1 # y1 = a >> 39 # S0A + and e, y2 # y2 = (f^g)&e # CH + add h, d # d = k + w + h + d # -- + + and b, y3 # y3 = (a|c)&b # MAJA + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + + rorx $28, a, T1 # T1 = (a >> 28) # S0 + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + add y3, h # h = t1 + S0 + MAJ # -- + + RotateState + + +################################### RND N + 2 ######################################### + + vpsrlq $19, YTMP2, YTMP3 # YTMP3 = W[-2] >> 19 {BABA} + vpsllq $(64-19), YTMP2, YTMP1 # YTMP1 = W[-2] << 19 {BABA} + vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 19 {BABA} + vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA} + vpsrlq $61, YTMP2, YTMP3 # YTMP3 = W[-2] >> 61 {BABA} + vpsllq $(64-61), YTMP2, YTMP1 # YTMP1 = W[-2] << 61 {BABA} + vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 61 {BABA} + vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = s1 = (W[-2] ror 19) ^ + # (W[-2] ror 61) ^ (W[-2] >> 6) {BABA} + + # Add sigma1 to the other compunents to get w[16] and w[17] + vpaddq YTMP4, Y_0, Y_0 # Y_0 = {W[1], W[0], W[1], W[0]} + + # Calculate sigma1 for w[18] and w[19] for upper 128 bit lane + vpsrlq $6, Y_0, YTMP4 # YTMP4 = W[-2] >> 6 {DC--} + + mov a, y3 # y3 = a # MAJA + rorx $41, e, y0 # y0 = e >> 41 # S1A + add 2*8+frame_XFER(%rsp), h # h = k + w + h # -- + + rorx $18, e, y1 # y1 = e >> 18 # S1B + or c, y3 # y3 = a|c # MAJA + mov f, y2 # y2 = f # CH + xor g, y2 # y2 = f^g # CH + + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + and e, y2 # y2 = (f^g)&e # CH + + rorx $14, e, y1 # y1 = (e >> 14) # S1 + add h, d # d = k + w + h + d # -- + and b, y3 # y3 = (a|c)&b # MAJA + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $39, a, y1 # y1 = a >> 39 # S0A + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + rorx $28, a, T1 # T1 = (a >> 28) # S0 + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + + add y3, h # h = t1 + S0 + MAJ # -- + + RotateState + +################################### RND N + 3 ######################################### + + vpsrlq $19, Y_0, YTMP3 # YTMP3 = W[-2] >> 19 {DC--} + vpsllq $(64-19), Y_0, YTMP1 # YTMP1 = W[-2] << 19 {DC--} + vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 19 {DC--} + vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--} + vpsrlq $61, Y_0, YTMP3 # YTMP3 = W[-2] >> 61 {DC--} + vpsllq $(64-61), Y_0, YTMP1 # YTMP1 = W[-2] << 61 {DC--} + vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 61 {DC--} + vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = s1 = (W[-2] ror 19) ^ + # (W[-2] ror 61) ^ (W[-2] >> 6) {DC--} + + # Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19] + # to newly calculated sigma1 to get w[18] and w[19] + vpaddq YTMP4, YTMP0, YTMP2 # YTMP2 = {W[3], W[2], --, --} + + # Form w[19, w[18], w17], w[16] + vpblendd $0xF0, YTMP2, Y_0, Y_0 # Y_0 = {W[3], W[2], W[1], W[0]} + + mov a, y3 # y3 = a # MAJA + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + add 3*8+frame_XFER(%rsp), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + + mov f, y2 # y2 = f # CH + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + xor g, y2 # y2 = f^g # CH + + + rorx $14, e, y1 # y1 = (e >> 14) # S1 + and e, y2 # y2 = (f^g)&e # CH + add h, d # d = k + w + h + d # -- + and b, y3 # y3 = (a|c)&b # MAJA + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + + rorx $39, a, y1 # y1 = a >> 39 # S0A + add y0, y2 # y2 = S1 + CH # -- + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + rorx $28, a, T1 # T1 = (a >> 28) # S0 + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and c, T1 # T1 = a&c # MAJB + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + + add y1, h # h = k + w + h + S0 # -- + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + add y3, h # h = t1 + S0 + MAJ # -- + + RotateState + + rotate_Ys +.endm + +.macro DO_4ROUNDS + +################################### RND N + 0 ######################################### + + mov f, y2 # y2 = f # CH + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + rorx $14, e, y1 # y1 = (e >> 14) # S1 + and e, y2 # y2 = (f^g)&e # CH + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $39, a, y1 # y1 = a >> 39 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + rorx $28, a, T1 # T1 = (a >> 28) # S0 + add frame_XFER(%rsp), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + RotateState + +################################### RND N + 1 ######################################### + + add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + mov f, y2 # y2 = f # CH + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + rorx $14, e, y1 # y1 = (e >> 14) # S1 + and e, y2 # y2 = (f^g)&e # CH + add y3, old_h # h = t1 + S0 + MAJ # -- + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $39, a, y1 # y1 = a >> 39 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + rorx $28, a, T1 # T1 = (a >> 28) # S0 + add 8*1+frame_XFER(%rsp), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + RotateState + +################################### RND N + 2 ######################################### + + add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + mov f, y2 # y2 = f # CH + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + rorx $14, e, y1 # y1 = (e >> 14) # S1 + and e, y2 # y2 = (f^g)&e # CH + add y3, old_h # h = t1 + S0 + MAJ # -- + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $39, a, y1 # y1 = a >> 39 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + rorx $28, a, T1 # T1 = (a >> 28) # S0 + add 8*2+frame_XFER(%rsp), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + RotateState + +################################### RND N + 3 ######################################### + + add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + mov f, y2 # y2 = f # CH + rorx $41, e, y0 # y0 = e >> 41 # S1A + rorx $18, e, y1 # y1 = e >> 18 # S1B + xor g, y2 # y2 = f^g # CH + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 + rorx $14, e, y1 # y1 = (e >> 14) # S1 + and e, y2 # y2 = (f^g)&e # CH + add y3, old_h # h = t1 + S0 + MAJ # -- + + xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 + rorx $34, a, T1 # T1 = a >> 34 # S0B + xor g, y2 # y2 = CH = ((f^g)&e)^g # CH + rorx $39, a, y1 # y1 = a >> 39 # S0A + mov a, y3 # y3 = a # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 + rorx $28, a, T1 # T1 = (a >> 28) # S0 + add 8*3+frame_XFER(%rsp), h # h = k + w + h # -- + or c, y3 # y3 = a|c # MAJA + + xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 + mov a, T1 # T1 = a # MAJB + and b, y3 # y3 = (a|c)&b # MAJA + and c, T1 # T1 = a&c # MAJB + add y0, y2 # y2 = S1 + CH # -- + + + add h, d # d = k + w + h + d # -- + or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ + add y1, h # h = k + w + h + S0 # -- + + add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- + + add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- + + add y3, h # h = t1 + S0 + MAJ # -- + + RotateState + +.endm + +######################################################################## +# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks +######################################################################## +SYM_TYPED_FUNC_START(sha512_transform_rorx) + # Save GPRs + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + # Allocate Stack Space + push %rbp + mov %rsp, %rbp + sub $frame_size, %rsp + and $~(0x20 - 1), %rsp + + shl $7, NUM_BLKS # convert to bytes + jz done_hash + add INP, NUM_BLKS # pointer to end of data + mov NUM_BLKS, frame_INPEND(%rsp) + + ## load initial digest + mov 8*0(CTX1), a + mov 8*1(CTX1), b + mov 8*2(CTX1), c + mov 8*3(CTX1), d + mov 8*4(CTX1), e + mov 8*5(CTX1), f + mov 8*6(CTX1), g + mov 8*7(CTX1), h + + # save %rdi (CTX) before it gets clobbered + mov %rdi, frame_CTX(%rsp) + + vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK + +loop0: + lea K512(%rip), TBL + + ## byte swap first 16 dwords + COPY_YMM_AND_BSWAP Y_0, (INP), BYTE_FLIP_MASK + COPY_YMM_AND_BSWAP Y_1, 1*32(INP), BYTE_FLIP_MASK + COPY_YMM_AND_BSWAP Y_2, 2*32(INP), BYTE_FLIP_MASK + COPY_YMM_AND_BSWAP Y_3, 3*32(INP), BYTE_FLIP_MASK + + mov INP, frame_INP(%rsp) + + ## schedule 64 input dwords, by doing 12 rounds of 4 each + movq $4, frame_SRND(%rsp) + +.align 16 +loop1: + vpaddq (TBL), Y_0, XFER + vmovdqa XFER, frame_XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + vpaddq 1*32(TBL), Y_0, XFER + vmovdqa XFER, frame_XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + vpaddq 2*32(TBL), Y_0, XFER + vmovdqa XFER, frame_XFER(%rsp) + FOUR_ROUNDS_AND_SCHED + + vpaddq 3*32(TBL), Y_0, XFER + vmovdqa XFER, frame_XFER(%rsp) + add $(4*32), TBL + FOUR_ROUNDS_AND_SCHED + + subq $1, frame_SRND(%rsp) + jne loop1 + + movq $2, frame_SRND(%rsp) +loop2: + vpaddq (TBL), Y_0, XFER + vmovdqa XFER, frame_XFER(%rsp) + DO_4ROUNDS + vpaddq 1*32(TBL), Y_1, XFER + vmovdqa XFER, frame_XFER(%rsp) + add $(2*32), TBL + DO_4ROUNDS + + vmovdqa Y_2, Y_0 + vmovdqa Y_3, Y_1 + + subq $1, frame_SRND(%rsp) + jne loop2 + + mov frame_CTX(%rsp), CTX2 + addm 8*0(CTX2), a + addm 8*1(CTX2), b + addm 8*2(CTX2), c + addm 8*3(CTX2), d + addm 8*4(CTX2), e + addm 8*5(CTX2), f + addm 8*6(CTX2), g + addm 8*7(CTX2), h + + mov frame_INP(%rsp), INP + add $128, INP + cmp frame_INPEND(%rsp), INP + jne loop0 + +done_hash: + + # Restore Stack Pointer + mov %rbp, %rsp + pop %rbp + + # Restore GPRs + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + + RET +SYM_FUNC_END(sha512_transform_rorx) + +######################################################################## +### Binary Data + + +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 +# K[t] used in SHA512 hashing +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 +# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x08090a0b0c0d0e0f0001020304050607 + .octa 0x18191a1b1c1d1e1f1011121314151617 + +.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32 +.align 32 +MASK_YMM_LO: + .octa 0x00000000000000000000000000000000 + .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S new file mode 100644 index 000000000..c06afb527 --- /dev/null +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -0,0 +1,425 @@ +######################################################################## +# Implement fast SHA-512 with SSSE3 instructions. (x86_64) +# +# Copyright (C) 2013 Intel Corporation. +# +# Authors: +# James Guilford +# Kirk Yap +# David Cote +# Tim Chen +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################## +# +# This code is described in an Intel White-Paper: +# "Fast SHA-512 Implementations on Intel Architecture Processors" +# +# To find it, surf to http://www.intel.com/p/en_US/embedded +# and search for that title. +# +######################################################################## + +#include +#include + +.text + +# Virtual Registers +# ARG1 +digest = %rdi +# ARG2 +msg = %rsi +# ARG3 +msglen = %rdx +T1 = %rcx +T2 = %r8 +a_64 = %r9 +b_64 = %r10 +c_64 = %r11 +d_64 = %r12 +e_64 = %r13 +f_64 = %r14 +g_64 = %r15 +h_64 = %rbx +tmp0 = %rax + +# Local variables (stack frame) + +W_SIZE = 80*8 +WK_SIZE = 2*8 + +frame_W = 0 +frame_WK = frame_W + W_SIZE +frame_size = frame_WK + WK_SIZE + +# Useful QWORD "arrays" for simpler memory references +# MSG, DIGEST, K_t, W_t are arrays +# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even + +# Input message (arg1) +#define MSG(i) 8*i(msg) + +# Output Digest (arg2) +#define DIGEST(i) 8*i(digest) + +# SHA Constants (static mem) +#define K_t(i) 8*i+K512(%rip) + +# Message Schedule (stack frame) +#define W_t(i) 8*i+frame_W(%rsp) + +# W[t]+K[t] (stack frame) +#define WK_2(i) 8*((i%2))+frame_WK(%rsp) + +.macro RotateState + # Rotate symbols a..h right + TMP = h_64 + h_64 = g_64 + g_64 = f_64 + f_64 = e_64 + e_64 = d_64 + d_64 = c_64 + c_64 = b_64 + b_64 = a_64 + a_64 = TMP +.endm + +.macro SHA512_Round rnd + + # Compute Round %%t + mov f_64, T1 # T1 = f + mov e_64, tmp0 # tmp = e + xor g_64, T1 # T1 = f ^ g + ror $23, tmp0 # 41 # tmp = e ror 23 + and e_64, T1 # T1 = (f ^ g) & e + xor e_64, tmp0 # tmp = (e ror 23) ^ e + xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g) + idx = \rnd + add WK_2(idx), T1 # W[t] + K[t] from message scheduler + ror $4, tmp0 # 18 # tmp = ((e ror 23) ^ e) ror 4 + xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e + mov a_64, T2 # T2 = a + add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h + ror $14, tmp0 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) + add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e) + mov a_64, tmp0 # tmp = a + xor c_64, T2 # T2 = a ^ c + and c_64, tmp0 # tmp = a & c + and b_64, T2 # T2 = (a ^ c) & b + xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) + mov a_64, tmp0 # tmp = a + ror $5, tmp0 # 39 # tmp = a ror 5 + xor a_64, tmp0 # tmp = (a ror 5) ^ a + add T1, d_64 # e(next_state) = d + T1 + ror $6, tmp0 # 34 # tmp = ((a ror 5) ^ a) ror 6 + xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a + lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c) + ror $28, tmp0 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) + add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a) + RotateState +.endm + +.macro SHA512_2Sched_2Round_sse rnd + + # Compute rounds t-2 and t-1 + # Compute message schedule QWORDS t and t+1 + + # Two rounds are computed based on the values for K[t-2]+W[t-2] and + # K[t-1]+W[t-1] which were previously stored at WK_2 by the message + # scheduler. + # The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. + # They are then added to their respective SHA512 constants at + # [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] + # For brievity, the comments following vectored instructions only refer to + # the first of a pair of QWORDS. + # Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]} + # The computation of the message schedule and the rounds are tightly + # stitched to take advantage of instruction-level parallelism. + # For clarity, integer instructions (for the rounds calculation) are indented + # by one tab. Vectored instructions (for the message scheduler) are indented + # by two tabs. + + mov f_64, T1 + idx = \rnd -2 + movdqa W_t(idx), %xmm2 # XMM2 = W[t-2] + xor g_64, T1 + and e_64, T1 + movdqa %xmm2, %xmm0 # XMM0 = W[t-2] + xor g_64, T1 + idx = \rnd + add WK_2(idx), T1 + idx = \rnd - 15 + movdqu W_t(idx), %xmm5 # XMM5 = W[t-15] + mov e_64, tmp0 + ror $23, tmp0 # 41 + movdqa %xmm5, %xmm3 # XMM3 = W[t-15] + xor e_64, tmp0 + ror $4, tmp0 # 18 + psrlq $61-19, %xmm0 # XMM0 = W[t-2] >> 42 + xor e_64, tmp0 + ror $14, tmp0 # 14 + psrlq $(8-7), %xmm3 # XMM3 = W[t-15] >> 1 + add tmp0, T1 + add h_64, T1 + pxor %xmm2, %xmm0 # XMM0 = (W[t-2] >> 42) ^ W[t-2] + mov a_64, T2 + xor c_64, T2 + pxor %xmm5, %xmm3 # XMM3 = (W[t-15] >> 1) ^ W[t-15] + and b_64, T2 + mov a_64, tmp0 + psrlq $(19-6), %xmm0 # XMM0 = ((W[t-2]>>42)^W[t-2])>>13 + and c_64, tmp0 + xor tmp0, T2 + psrlq $(7-1), %xmm3 # XMM3 = ((W[t-15]>>1)^W[t-15])>>6 + mov a_64, tmp0 + ror $5, tmp0 # 39 + pxor %xmm2, %xmm0 # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2] + xor a_64, tmp0 + ror $6, tmp0 # 34 + pxor %xmm5, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15] + xor a_64, tmp0 + ror $28, tmp0 # 28 + psrlq $6, %xmm0 # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6 + add tmp0, T2 + add T1, d_64 + psrlq $1, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1 + lea (T1, T2), h_64 + RotateState + movdqa %xmm2, %xmm1 # XMM1 = W[t-2] + mov f_64, T1 + xor g_64, T1 + movdqa %xmm5, %xmm4 # XMM4 = W[t-15] + and e_64, T1 + xor g_64, T1 + psllq $(64-19)-(64-61) , %xmm1 # XMM1 = W[t-2] << 42 + idx = \rnd + 1 + add WK_2(idx), T1 + mov e_64, tmp0 + psllq $(64-1)-(64-8), %xmm4 # XMM4 = W[t-15] << 7 + ror $23, tmp0 # 41 + xor e_64, tmp0 + pxor %xmm2, %xmm1 # XMM1 = (W[t-2] << 42)^W[t-2] + ror $4, tmp0 # 18 + xor e_64, tmp0 + pxor %xmm5, %xmm4 # XMM4 = (W[t-15]<<7)^W[t-15] + ror $14, tmp0 # 14 + add tmp0, T1 + psllq $(64-61), %xmm1 # XMM1 = ((W[t-2] << 42)^W[t-2])<<3 + add h_64, T1 + mov a_64, T2 + psllq $(64-8), %xmm4 # XMM4 = ((W[t-15]<<7)^W[t-15])<<56 + xor c_64, T2 + and b_64, T2 + pxor %xmm1, %xmm0 # XMM0 = s1(W[t-2]) + mov a_64, tmp0 + and c_64, tmp0 + idx = \rnd - 7 + movdqu W_t(idx), %xmm1 # XMM1 = W[t-7] + xor tmp0, T2 + pxor %xmm4, %xmm3 # XMM3 = s0(W[t-15]) + mov a_64, tmp0 + paddq %xmm3, %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) + ror $5, tmp0 # 39 + idx =\rnd-16 + paddq W_t(idx), %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16] + xor a_64, tmp0 + paddq %xmm1, %xmm0 # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] + ror $6, tmp0 # 34 + movdqa %xmm0, W_t(\rnd) # Store scheduled qwords + xor a_64, tmp0 + paddq K_t(\rnd), %xmm0 # Compute W[t]+K[t] + ror $28, tmp0 # 28 + idx = \rnd + movdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds + add tmp0, T2 + add T1, d_64 + lea (T1, T2), h_64 + RotateState +.endm + +######################################################################## +## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data, +## int blocks); +# (struct sha512_state is assumed to begin with u64 state[8]) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks. +######################################################################## +SYM_TYPED_FUNC_START(sha512_transform_ssse3) + + test msglen, msglen + je nowork + + # Save GPRs + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + # Allocate Stack Space + push %rbp + mov %rsp, %rbp + sub $frame_size, %rsp + and $~(0x20 - 1), %rsp + +updateblock: + +# Load state variables + mov DIGEST(0), a_64 + mov DIGEST(1), b_64 + mov DIGEST(2), c_64 + mov DIGEST(3), d_64 + mov DIGEST(4), e_64 + mov DIGEST(5), f_64 + mov DIGEST(6), g_64 + mov DIGEST(7), h_64 + + t = 0 + .rept 80/2 + 1 + # (80 rounds) / (2 rounds/iteration) + (1 iteration) + # +1 iteration because the scheduler leads hashing by 1 iteration + .if t < 2 + # BSWAP 2 QWORDS + movdqa XMM_QWORD_BSWAP(%rip), %xmm1 + movdqu MSG(t), %xmm0 + pshufb %xmm1, %xmm0 # BSWAP + movdqa %xmm0, W_t(t) # Store Scheduled Pair + paddq K_t(t), %xmm0 # Compute W[t]+K[t] + movdqa %xmm0, WK_2(t) # Store into WK for rounds + .elseif t < 16 + # BSWAP 2 QWORDS# Compute 2 Rounds + movdqu MSG(t), %xmm0 + pshufb %xmm1, %xmm0 # BSWAP + SHA512_Round t-2 # Round t-2 + movdqa %xmm0, W_t(t) # Store Scheduled Pair + paddq K_t(t), %xmm0 # Compute W[t]+K[t] + SHA512_Round t-1 # Round t-1 + movdqa %xmm0, WK_2(t) # Store W[t]+K[t] into WK + .elseif t < 79 + # Schedule 2 QWORDS# Compute 2 Rounds + SHA512_2Sched_2Round_sse t + .else + # Compute 2 Rounds + SHA512_Round t-2 + SHA512_Round t-1 + .endif + t = t+2 + .endr + + # Update digest + add a_64, DIGEST(0) + add b_64, DIGEST(1) + add c_64, DIGEST(2) + add d_64, DIGEST(3) + add e_64, DIGEST(4) + add f_64, DIGEST(5) + add g_64, DIGEST(6) + add h_64, DIGEST(7) + + # Advance to next message block + add $16*8, msg + dec msglen + jnz updateblock + + # Restore Stack Pointer + mov %rbp, %rsp + pop %rbp + + # Restore GPRs + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + +nowork: + RET +SYM_FUNC_END(sha512_transform_ssse3) + +######################################################################## +### Binary Data + +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 +.align 16 +# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. +XMM_QWORD_BSWAP: + .octa 0x08090a0b0c0d0e0f0001020304050607 + +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 +# K[t] used in SHA512 hashing +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c new file mode 100644 index 000000000..6d3b85e53 --- /dev/null +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -0,0 +1,347 @@ +/* + * Cryptographic API. + * + * Glue code for the SHA512 Secure Hash Algorithm assembler + * implementation using supplemental SSE3 / AVX / AVX2 instructions. + * + * This file is based on sha512_generic.c + * + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void sha512_transform_ssse3(struct sha512_state *state, + const u8 *data, int blocks); + +static int sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha512_block_fn *sha512_xform) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) + return crypto_sha512_update(desc, data, len); + + /* + * Make sure struct sha512_state begins directly with the SHA512 + * 512-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); + + kernel_fpu_begin(); + sha512_base_do_update(desc, data, len, sha512_xform); + kernel_fpu_end(); + + return 0; +} + +static int sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha512_block_fn *sha512_xform) +{ + if (!crypto_simd_usable()) + return crypto_sha512_finup(desc, data, len, out); + + kernel_fpu_begin(); + if (len) + sha512_base_do_update(desc, data, len, sha512_xform); + sha512_base_do_finalize(desc, sha512_xform); + kernel_fpu_end(); + + return sha512_base_finish(desc, out); +} + +static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_ssse3); +} + +static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_ssse3); +} + +/* Add padding and return the message digest. */ +static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) +{ + return sha512_ssse3_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_ssse3_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_ssse3_update, + .final = sha512_ssse3_final, + .finup = sha512_ssse3_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-ssse3", + .cra_priority = 150, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_ssse3_update, + .final = sha512_ssse3_final, + .finup = sha512_ssse3_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-ssse3", + .cra_priority = 150, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int register_sha512_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); + return 0; +} + +static void unregister_sha512_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); +} + +asmlinkage void sha512_transform_avx(struct sha512_state *state, + const u8 *data, int blocks); +static bool avx_usable(void) +{ + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + if (boot_cpu_has(X86_FEATURE_AVX)) + pr_info("AVX detected but unusable.\n"); + return false; + } + + return true; +} + +static int sha512_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_avx); +} + +static int sha512_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_avx); +} + +/* Add padding and return the message digest. */ +static int sha512_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha512_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_avx_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_avx_update, + .final = sha512_avx_final, + .finup = sha512_avx_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-avx", + .cra_priority = 160, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_avx_update, + .final = sha512_avx_final, + .finup = sha512_avx_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-avx", + .cra_priority = 160, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int register_sha512_avx(void) +{ + if (avx_usable()) + return crypto_register_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); + return 0; +} + +static void unregister_sha512_avx(void) +{ + if (avx_usable()) + crypto_unregister_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); +} + +asmlinkage void sha512_transform_rorx(struct sha512_state *state, + const u8 *data, int blocks); + +static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_rorx); +} + +static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_rorx); +} + +/* Add padding and return the message digest. */ +static int sha512_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha512_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_avx2_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_avx2_update, + .final = sha512_avx2_final, + .finup = sha512_avx2_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-avx2", + .cra_priority = 170, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_avx2_update, + .final = sha512_avx2_final, + .finup = sha512_avx2_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-avx2", + .cra_priority = 170, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static int register_sha512_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); + return 0; +} +static const struct x86_cpu_id module_cpu_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), + X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), + X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); + +static void unregister_sha512_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); +} + +static int __init sha512_ssse3_mod_init(void) +{ + if (!x86_match_cpu(module_cpu_ids)) + return -ENODEV; + + if (register_sha512_ssse3()) + goto fail; + + if (register_sha512_avx()) { + unregister_sha512_ssse3(); + goto fail; + } + + if (register_sha512_avx2()) { + unregister_sha512_avx(); + unregister_sha512_ssse3(); + goto fail; + } + + return 0; +fail: + return -ENODEV; +} + +static void __exit sha512_ssse3_mod_fini(void) +{ + unregister_sha512_avx2(); + unregister_sha512_avx(); + unregister_sha512_ssse3(); +} + +module_init(sha512_ssse3_mod_init); +module_exit(sha512_ssse3_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); + +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha512-ssse3"); +MODULE_ALIAS_CRYPTO("sha512-avx"); +MODULE_ALIAS_CRYPTO("sha512-avx2"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha384-ssse3"); +MODULE_ALIAS_CRYPTO("sha384-avx"); +MODULE_ALIAS_CRYPTO("sha384-avx2"); diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S new file mode 100644 index 000000000..8fc5ac681 --- /dev/null +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -0,0 +1,518 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 AVX accelerated transform. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Jussi Kivilinna + * Copyright (C) 2021 Tianjia Zhang + */ + +/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at: + * https://gnupg.org/software/libgcrypt/index.html + */ + +#include +#include +#include + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 +#define state_h5 20 +#define state_h6 24 +#define state_h7 28 + +/* Constants */ + +/* Round constant macros */ + +#define K0 2043430169 /* 0x79cc4519 */ +#define K1 -208106958 /* 0xf3988a32 */ +#define K2 -416213915 /* 0xe7311465 */ +#define K3 -832427829 /* 0xce6228cb */ +#define K4 -1664855657 /* 0x9cc45197 */ +#define K5 965255983 /* 0x3988a32f */ +#define K6 1930511966 /* 0x7311465e */ +#define K7 -433943364 /* 0xe6228cbc */ +#define K8 -867886727 /* 0xcc451979 */ +#define K9 -1735773453 /* 0x988a32f3 */ +#define K10 823420391 /* 0x311465e7 */ +#define K11 1646840782 /* 0x6228cbce */ +#define K12 -1001285732 /* 0xc451979c */ +#define K13 -2002571463 /* 0x88a32f39 */ +#define K14 289824371 /* 0x11465e73 */ +#define K15 579648742 /* 0x228cbce6 */ +#define K16 -1651869049 /* 0x9d8a7a87 */ +#define K17 991229199 /* 0x3b14f50f */ +#define K18 1982458398 /* 0x7629ea1e */ +#define K19 -330050500 /* 0xec53d43c */ +#define K20 -660100999 /* 0xd8a7a879 */ +#define K21 -1320201997 /* 0xb14f50f3 */ +#define K22 1654563303 /* 0x629ea1e7 */ +#define K23 -985840690 /* 0xc53d43ce */ +#define K24 -1971681379 /* 0x8a7a879d */ +#define K25 351604539 /* 0x14f50f3b */ +#define K26 703209078 /* 0x29ea1e76 */ +#define K27 1406418156 /* 0x53d43cec */ +#define K28 -1482130984 /* 0xa7a879d8 */ +#define K29 1330705329 /* 0x4f50f3b1 */ +#define K30 -1633556638 /* 0x9ea1e762 */ +#define K31 1027854021 /* 0x3d43cec5 */ +#define K32 2055708042 /* 0x7a879d8a */ +#define K33 -183551212 /* 0xf50f3b14 */ +#define K34 -367102423 /* 0xea1e7629 */ +#define K35 -734204845 /* 0xd43cec53 */ +#define K36 -1468409689 /* 0xa879d8a7 */ +#define K37 1358147919 /* 0x50f3b14f */ +#define K38 -1578671458 /* 0xa1e7629e */ +#define K39 1137624381 /* 0x43cec53d */ +#define K40 -2019718534 /* 0x879d8a7a */ +#define K41 255530229 /* 0x0f3b14f5 */ +#define K42 511060458 /* 0x1e7629ea */ +#define K43 1022120916 /* 0x3cec53d4 */ +#define K44 2044241832 /* 0x79d8a7a8 */ +#define K45 -206483632 /* 0xf3b14f50 */ +#define K46 -412967263 /* 0xe7629ea1 */ +#define K47 -825934525 /* 0xcec53d43 */ +#define K48 -1651869049 /* 0x9d8a7a87 */ +#define K49 991229199 /* 0x3b14f50f */ +#define K50 1982458398 /* 0x7629ea1e */ +#define K51 -330050500 /* 0xec53d43c */ +#define K52 -660100999 /* 0xd8a7a879 */ +#define K53 -1320201997 /* 0xb14f50f3 */ +#define K54 1654563303 /* 0x629ea1e7 */ +#define K55 -985840690 /* 0xc53d43ce */ +#define K56 -1971681379 /* 0x8a7a879d */ +#define K57 351604539 /* 0x14f50f3b */ +#define K58 703209078 /* 0x29ea1e76 */ +#define K59 1406418156 /* 0x53d43cec */ +#define K60 -1482130984 /* 0xa7a879d8 */ +#define K61 1330705329 /* 0x4f50f3b1 */ +#define K62 -1633556638 /* 0x9ea1e762 */ +#define K63 1027854021 /* 0x3d43cec5 */ + +/* Register macros */ + +#define RSTATE %rdi +#define RDATA %rsi +#define RNBLKS %rdx + +#define t0 %eax +#define t1 %ebx +#define t2 %ecx + +#define a %r8d +#define b %r9d +#define c %r10d +#define d %r11d +#define e %r12d +#define f %r13d +#define g %r14d +#define h %r15d + +#define W0 %xmm0 +#define W1 %xmm1 +#define W2 %xmm2 +#define W3 %xmm3 +#define W4 %xmm4 +#define W5 %xmm5 + +#define XTMP0 %xmm6 +#define XTMP1 %xmm7 +#define XTMP2 %xmm8 +#define XTMP3 %xmm9 +#define XTMP4 %xmm10 +#define XTMP5 %xmm11 +#define XTMP6 %xmm12 + +#define BSWAP_REG %xmm15 + +/* Stack structure */ + +#define STACK_W_SIZE (32 * 2 * 3) +#define STACK_REG_SAVE_SIZE (64) + +#define STACK_W (0) +#define STACK_REG_SAVE (STACK_W + STACK_W_SIZE) +#define STACK_SIZE (STACK_REG_SAVE + STACK_REG_SAVE_SIZE) + +/* Instruction helpers. */ + +#define roll2(v, reg) \ + roll $(v), reg; + +#define roll3mov(v, src, dst) \ + movl src, dst; \ + roll $(v), dst; + +#define roll3(v, src, dst) \ + rorxl $(32-(v)), src, dst; + +#define addl2(a, out) \ + leal (a, out), out; + +/* Round function macros. */ + +#define GG1(x, y, z, o, t) \ + movl x, o; \ + xorl y, o; \ + xorl z, o; + +#define FF1(x, y, z, o, t) GG1(x, y, z, o, t) + +#define GG2(x, y, z, o, t) \ + andnl z, x, o; \ + movl y, t; \ + andl x, t; \ + addl2(t, o); + +#define FF2(x, y, z, o, t) \ + movl y, o; \ + xorl x, o; \ + movl y, t; \ + andl x, t; \ + andl z, o; \ + xorl t, o; + +#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \ + /* rol(a, 12) => t0 */ \ + roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \ + /* rol (t0 + e + t), 7) => t1 */ \ + leal K##round(t0, e, 1), t1; \ + roll2(7, t1); \ + /* h + w1 => h */ \ + addl wtype##_W1_ADDR(round, widx), h; \ + /* h + t1 => h */ \ + addl2(t1, h); \ + /* t1 ^ t0 => t0 */ \ + xorl t1, t0; \ + /* w1w2 + d => d */ \ + addl wtype##_W1W2_ADDR(round, widx), d; \ + /* FF##i(a,b,c) => t1 */ \ + FF##i(a, b, c, t1, t2); \ + /* d + t1 => d */ \ + addl2(t1, d); \ + /* GG#i(e,f,g) => t2 */ \ + GG##i(e, f, g, t2, t1); \ + /* h + t2 => h */ \ + addl2(t2, h); \ + /* rol (f, 19) => f */ \ + roll2(19, f); \ + /* d + t0 => d */ \ + addl2(t0, d); \ + /* rol (b, 9) => b */ \ + roll2(9, b); \ + /* P0(h) => h */ \ + roll3(9, h, t2); \ + roll3(17, h, t1); \ + xorl t2, h; \ + xorl t1, h; + +#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(1, a, b, c, d, e, f, g, h, round, widx, wtype) + +#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(2, a, b, c, d, e, f, g, h, round, widx, wtype) + +/* Input expansion macros. */ + +/* Byte-swapped input address. */ +#define IW_W_ADDR(round, widx, offs) \ + (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Expanded input address. */ +#define XW_W_ADDR(round, widx, offs) \ + (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Rounds 1-12, byte-swapped input block addresses. */ +#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 0) +#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32) + +/* Rounds 1-12, expanded input block addresses. */ +#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) +#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32) + +/* Input block loading. */ +#define LOAD_W_XMM_1() \ + vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \ + vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \ + vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \ + vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */ \ + vpshufb BSWAP_REG, XTMP0, XTMP0; \ + vpshufb BSWAP_REG, XTMP1, XTMP1; \ + vpshufb BSWAP_REG, XTMP2, XTMP2; \ + vpshufb BSWAP_REG, XTMP3, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP4; \ + vpxor XTMP1, XTMP2, XTMP5; \ + vpxor XTMP2, XTMP3, XTMP6; \ + leaq 64(RDATA), RDATA; \ + vmovdqa XTMP0, IW_W1_ADDR(0, 0); \ + vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \ + vmovdqa XTMP1, IW_W1_ADDR(4, 0); \ + vmovdqa XTMP5, IW_W1W2_ADDR(4, 0); + +#define LOAD_W_XMM_2() \ + vmovdqa XTMP2, IW_W1_ADDR(8, 0); \ + vmovdqa XTMP6, IW_W1W2_ADDR(8, 0); + +#define LOAD_W_XMM_3() \ + vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \ + vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \ + vmovdqa XTMP1, W2; /* W2: xx, w6, w5, w4 */ \ + vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \ + vpalignr $8, XTMP2, XTMP3, W4; /* W4: xx, w12, w11, w10 */ \ + vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */ + +/* Message scheduling. Note: 3 words per XMM register. */ +#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 16]) => XTMP0 */ \ + vpshufd $0b10111111, w0, XTMP0; \ + vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \ + /* Load (w[i - 13]) => XTMP1 */ \ + vpshufd $0b10111111, w1, XTMP1; \ + vpalignr $12, XTMP1, w2, XTMP1; \ + /* w[i - 9] == w3 */ \ + /* XMM3 ^ XTMP0 => XTMP0 */ \ + vpxor w3, XTMP0, XTMP0; + +#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 3] == w5 */ \ + /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ + vpslld $15, w5, XTMP2; \ + vpsrld $(32-15), w5, XTMP3; \ + vpxor XTMP2, XTMP3, XTMP3; \ + vpxor XTMP3, XTMP0, XTMP0; \ + /* rol(XTMP1, 7) => XTMP1 */ \ + vpslld $7, XTMP1, XTMP5; \ + vpsrld $(32-7), XTMP1, XTMP1; \ + vpxor XTMP5, XTMP1, XTMP1; \ + /* XMM4 ^ XTMP1 => XTMP1 */ \ + vpxor w4, XTMP1, XTMP1; \ + /* w[i - 6] == XMM4 */ \ + /* P1(XTMP0) ^ XTMP1 => XMM0 */ \ + vpslld $15, XTMP0, XTMP5; \ + vpsrld $(32-15), XTMP0, XTMP6; \ + vpslld $23, XTMP0, XTMP2; \ + vpsrld $(32-23), XTMP0, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP1; \ + vpxor XTMP6, XTMP5, XTMP5; \ + vpxor XTMP3, XTMP2, XTMP2; \ + vpxor XTMP2, XTMP5, XTMP5; \ + vpxor XTMP5, XTMP1, w0; + +#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \ + /* W1 in XMM12 */ \ + vpshufd $0b10111111, w4, XTMP4; \ + vpalignr $12, XTMP4, w5, XTMP4; \ + vmovdqa XTMP4, XW_W1_ADDR((round), 0); \ + /* W1 ^ W2 => XTMP1 */ \ + vpxor w0, XTMP4, XTMP1; \ + vmovdqa XTMP1, XW_W1W2_ADDR((round), 0); + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +.Lbe32mask: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + +.text + +/* + * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA. + * + * void sm3_transform_avx(struct sm3_state *state, + * const u8 *data, int nblocks); + */ +.align 16 +SYM_TYPED_FUNC_START(sm3_transform_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblocks + */ + vzeroupper; + + pushq %rbp; + movq %rsp, %rbp; + + movq %rdx, RNBLKS; + + subq $STACK_SIZE, %rsp; + andq $(~63), %rsp; + + movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp); + movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp); + movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp); + movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp); + movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp); + + vmovdqa .Lbe32mask (%rip), BSWAP_REG; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + movl state_h5(RSTATE), f; + movl state_h6(RSTATE), g; + movl state_h7(RSTATE), h; + +.align 16 +.Loop: + /* Load data part1. */ + LOAD_W_XMM_1(); + + leaq -1(RNBLKS), RNBLKS; + + /* Transform 0-3 + Load data part2. */ + R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2(); + R1(d, a, b, c, h, e, f, g, 1, 1, IW); + R1(c, d, a, b, g, h, e, f, 2, 2, IW); + R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3(); + + /* Transform 4-7 + Precalc 12-14. */ + R1(a, b, c, d, e, f, g, h, 4, 0, IW); + R1(d, a, b, c, h, e, f, g, 5, 1, IW); + R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5); + R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5); + + /* Transform 8-11 + Precalc 12-17. */ + R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5); + R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0); + R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0); + R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0); + + /* Transform 12-14 + Precalc 18-20 */ + R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1); + R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1); + R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1); + + /* Transform 15-17 + Precalc 21-23 */ + R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2); + + /* Transform 18-20 + Precalc 24-26 */ + R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3); + + /* Transform 21-23 + Precalc 27-29 */ + R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4); + R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4); + + /* Transform 24-26 + Precalc 30-32 */ + R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5); + R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5); + R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5); + + /* Transform 27-29 + Precalc 33-35 */ + R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0); + R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0); + R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0); + + /* Transform 30-32 + Precalc 36-38 */ + R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1); + R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1); + R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1); + + /* Transform 33-35 + Precalc 39-41 */ + R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2); + R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2); + R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2); + + /* Transform 36-38 + Precalc 42-44 */ + R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3); + R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3); + R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3); + + /* Transform 39-41 + Precalc 45-47 */ + R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4); + R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4); + R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4); + + /* Transform 42-44 + Precalc 48-50 */ + R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5); + R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5); + R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5); + + /* Transform 45-47 + Precalc 51-53 */ + R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0); + R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0); + R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0); + + /* Transform 48-50 + Precalc 54-56 */ + R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1); + R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1); + R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1); + + /* Transform 51-53 + Precalc 57-59 */ + R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2); + + /* Transform 54-56 + Precalc 60-62 */ + R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3); + + /* Transform 57-59 + Precalc 63 */ + R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 58, 1, XW); + R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4); + + /* Transform 60-62 + Precalc 63 */ + R2(a, b, c, d, e, f, g, h, 60, 0, XW); + R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 62, 2, XW); + + /* Transform 63 */ + R2(b, c, d, a, f, g, h, e, 63, 0, XW); + + /* Update the chaining variables. */ + xorl state_h0(RSTATE), a; + xorl state_h1(RSTATE), b; + xorl state_h2(RSTATE), c; + xorl state_h3(RSTATE), d; + movl a, state_h0(RSTATE); + movl b, state_h1(RSTATE); + movl c, state_h2(RSTATE); + movl d, state_h3(RSTATE); + xorl state_h4(RSTATE), e; + xorl state_h5(RSTATE), f; + xorl state_h6(RSTATE), g; + xorl state_h7(RSTATE), h; + movl e, state_h4(RSTATE); + movl f, state_h5(RSTATE); + movl g, state_h6(RSTATE); + movl h, state_h7(RSTATE); + + cmpq $0, RNBLKS; + jne .Loop; + + vzeroall; + + movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx; + movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15; + movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14; + movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13; + movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12; + + vmovdqa %xmm0, IW_W1_ADDR(0, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(0, 0); + vmovdqa %xmm0, IW_W1_ADDR(4, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(4, 0); + vmovdqa %xmm0, IW_W1_ADDR(8, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(8, 0); + + movq %rbp, %rsp; + popq %rbp; + RET; +SYM_FUNC_END(sm3_transform_avx) diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c new file mode 100644 index 000000000..661b6f22f --- /dev/null +++ b/arch/x86/crypto/sm3_avx_glue.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 Secure Hash Algorithm, AVX assembler accelerated. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Tianjia Zhang + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void sm3_transform_avx(struct sm3_state *state, + const u8 *data, int nblocks); + +static int sm3_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) { + sm3_update(sctx, data, len); + return 0; + } + + /* + * Make sure struct sm3_state begins directly with the SM3 + * 256-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0); + + kernel_fpu_begin(); + sm3_base_do_update(desc, data, len, sm3_transform_avx); + kernel_fpu_end(); + + return 0; +} + +static int sm3_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + + sm3_final(sctx, out); + return 0; + } + + kernel_fpu_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_transform_avx); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_avx_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_fpu_begin(); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_avx_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_avx_update, + .final = sm3_avx_final, + .finup = sm3_avx_finup, + .descsize = sizeof(struct sm3_state), + .base = { + .cra_name = "sm3", + .cra_driver_name = "sm3-avx", + .cra_priority = 300, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sm3_avx_mod_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX)) { + pr_info("AVX instruction are not detected.\n"); + return -ENODEV; + } + + if (!boot_cpu_has(X86_FEATURE_BMI2)) { + pr_info("BMI2 instruction are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return crypto_register_shash(&sm3_avx_alg); +} + +static void __exit sm3_avx_mod_exit(void) +{ + crypto_unregister_shash(&sm3_avx_alg); +} + +module_init(sm3_avx_mod_init); +module_exit(sm3_avx_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated"); +MODULE_ALIAS_CRYPTO("sm3"); +MODULE_ALIAS_CRYPTO("sm3-avx"); diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S new file mode 100644 index 000000000..22b6560eb --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -0,0 +1,595 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include +#include + +#define rRIP (%rip) + +#define RX0 %xmm0 +#define RX1 %xmm1 +#define MASK_4BIT %xmm2 +#define RTMP0 %xmm3 +#define RTMP1 %xmm4 +#define RTMP2 %xmm5 +#define RTMP3 %xmm6 +#define RTMP4 %xmm7 + +#define RA0 %xmm8 +#define RA1 %xmm9 +#define RA2 %xmm10 +#define RA3 %xmm11 + +#define RB0 %xmm12 +#define RB1 %xmm13 +#define RB2 %xmm14 +#define RB3 %xmm15 + +#define RNOT %xmm0 +#define RBSWAP %xmm1 + + +/* Transpose four 32-bit words between 128-bit vectors. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* pre-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. + */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + + +.text +.align 16 + +/* + * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt4) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..4 blocks) + * %rdx: src (1..4 blocks) + * %rcx: num blocks (1..4) + */ + FRAME_BEGIN + + vmovdqu 0*16(%rdx), RA0; + vmovdqa RA0, RA1; + vmovdqa RA0, RA2; + vmovdqa RA0, RA3; + cmpq $2, %rcx; + jb .Lblk4_load_input_done; + vmovdqu 1*16(%rdx), RA1; + je .Lblk4_load_input_done; + vmovdqu 2*16(%rdx), RA2; + cmpq $3, %rcx; + je .Lblk4_load_input_done; + vmovdqu 3*16(%rdx), RA3; + +.Lblk4_load_input_done: + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; + vmovdqa .Lpre_tf_hi_s rRIP, RB0; + vmovdqa .Lpost_tf_lo_s rRIP, RB1; + vmovdqa .Lpost_tf_hi_s rRIP, RB2; + vmovdqa .Linv_shift_row rRIP, RB3; + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2; + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \ + vaesenclast MASK_4BIT, RX0, RX0; \ + transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RB3, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP2, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP3, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk4: + ROUND(0, RA0, RA1, RA2, RA3); + ROUND(1, RA1, RA2, RA3, RA0); + ROUND(2, RA2, RA3, RA0, RA1); + ROUND(3, RA3, RA0, RA1, RA2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk4; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vmovdqu RA0, 0*16(%rsi); + cmpq $2, %rcx; + jb .Lblk4_store_output_done; + vmovdqu RA1, 1*16(%rsi); + je .Lblk4_store_output_done; + vmovdqu RA2, 2*16(%rsi); + cmpq $3, %rcx; + je .Lblk4_store_output_done; + vmovdqu RA3, 3*16(%rsi); + +.Lblk4_store_output_done: + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx_crypt4) + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \ + vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \ + vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vmovdqa .Linv_shift_row rRIP, RTMP4; \ + vaesenclast MASK_4BIT, RX0, RX0; \ + vaesenclast MASK_4BIT, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + RET; +SYM_FUNC_END(__sm4_crypt_blk8) + +/* + * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..8 blocks) + * %rdx: src (1..8 blocks) + * %rcx: num blocks (1..8) + */ + cmpq $5, %rcx; + jb sm4_aesni_avx_crypt4; + + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqa RB0, RB1; + vmovdqa RB0, RB2; + vmovdqa RB0, RB3; + je .Lblk8_load_input_done; + vmovdqu (5 * 16)(%rdx), RB1; + cmpq $7, %rcx; + jb .Lblk8_load_input_done; + vmovdqu (6 * 16)(%rdx), RB2; + je .Lblk8_load_input_done; + vmovdqu (7 * 16)(%rdx), RB3; + +.Lblk8_load_input_done: + call __sm4_crypt_blk8; + + cmpq $6, %rcx; + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + jb .Lblk8_store_output_done; + vmovdqu RB1, (5 * 16)(%rsi); + je .Lblk8_store_output_done; + vmovdqu RB2, (6 * 16)(%rsi); + cmpq $7, %rcx; + je .Lblk8_store_output_done; + vmovdqu RB3, (7 * 16)(%rsi); + +.Lblk8_store_output_done: + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx_crypt8) + +/* + * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + /* load IV and byteswap */ + vmovdqu (%rcx), RA0; + + vmovdqa .Lbswap128_mask rRIP, RBSWAP; + vpshufb RBSWAP, RA0, RTMP0; /* be => le */ + + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */ + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP2); /* +1 */ + vpshufb RBSWAP, RTMP0, RA1; + inc_le128(RTMP0, RNOT, RTMP2); /* +2 */ + vpshufb RBSWAP, RTMP0, RA2; + inc_le128(RTMP0, RNOT, RTMP2); /* +3 */ + vpshufb RBSWAP, RTMP0, RA3; + inc_le128(RTMP0, RNOT, RTMP2); /* +4 */ + vpshufb RBSWAP, RTMP0, RB0; + inc_le128(RTMP0, RNOT, RTMP2); /* +5 */ + vpshufb RBSWAP, RTMP0, RB1; + inc_le128(RTMP0, RNOT, RTMP2); /* +6 */ + vpshufb RBSWAP, RTMP0, RB2; + inc_le128(RTMP0, RNOT, RTMP2); /* +7 */ + vpshufb RBSWAP, RTMP0, RB3; + inc_le128(RTMP0, RNOT, RTMP2); /* +8 */ + vpshufb RBSWAP, RTMP0, RTMP1; + + /* store new IV */ + vmovdqu RTMP1, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) + +/* + * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqu (5 * 16)(%rdx), RB1; + vmovdqu (6 * 16)(%rdx), RB2; + vmovdqu (7 * 16)(%rdx), RB3; + + call __sm4_crypt_blk8; + + vmovdqu (7 * 16)(%rdx), RNOT; + vpxor (%rcx), RA0, RA0; + vpxor (0 * 16)(%rdx), RA1, RA1; + vpxor (1 * 16)(%rdx), RA2, RA2; + vpxor (2 * 16)(%rdx), RA3, RA3; + vpxor (3 * 16)(%rdx), RB0, RB0; + vpxor (4 * 16)(%rdx), RB1, RB1; + vpxor (5 * 16)(%rdx), RB2, RB2; + vpxor (6 * 16)(%rdx), RB3, RB3; + vmovdqu RNOT, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) + +/* + * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + /* Load input */ + vmovdqu (%rcx), RA0; + vmovdqu 0 * 16(%rdx), RA1; + vmovdqu 1 * 16(%rdx), RA2; + vmovdqu 2 * 16(%rdx), RA3; + vmovdqu 3 * 16(%rdx), RB0; + vmovdqu 4 * 16(%rdx), RB1; + vmovdqu 5 * 16(%rdx), RB2; + vmovdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + vmovdqu 7 * 16(%rdx), RNOT; + vmovdqu RNOT, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S new file mode 100644 index 000000000..23ee39a8a --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include +#include + +#define rRIP (%rip) + +/* vector registers */ +#define RX0 %ymm0 +#define RX1 %ymm1 +#define MASK_4BIT %ymm2 +#define RTMP0 %ymm3 +#define RTMP1 %ymm4 +#define RTMP2 %ymm5 +#define RTMP3 %ymm6 +#define RTMP4 %ymm7 + +#define RA0 %ymm8 +#define RA1 %ymm9 +#define RA2 %ymm10 +#define RA3 %ymm11 + +#define RB0 %ymm12 +#define RB1 %ymm13 +#define RB2 %ymm14 +#define RB3 %ymm15 + +#define RNOT %ymm0 +#define RBSWAP %ymm1 + +#define RX0x %xmm0 +#define RX1x %xmm1 +#define MASK_4BITx %xmm2 + +#define RNOTx %xmm0 +#define RBSWAPx %xmm1 + +#define RTMP0x %xmm3 +#define RTMP1x %xmm4 +#define RTMP2x %xmm5 +#define RTMP3x %xmm6 +#define RTMP4x %xmm7 + + +/* helper macros */ + +/* Transpose four 32-bit words between 128-bit vector lanes. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* post-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + +.text +.align 16 + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vbroadcasti128 .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vpbroadcastd (4*(round))(%rdi), RX0; \ + vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \ + vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \ + vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vextracti128 $1, RX0, RTMP4x; \ + vextracti128 $1, RX1, RTMP0x; \ + vaesenclast MASK_4BITx, RX0x, RX0x; \ + vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \ + vaesenclast MASK_4BITx, RX1x, RX1x; \ + vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \ + vinserti128 $1, RTMP4x, RX0, RX0; \ + vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \ + vinserti128 $1, RTMP0x, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + RET; +SYM_FUNC_END(__sm4_crypt_blk16) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +/* + * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RA2; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RA3; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RB2; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RB3; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) + +/* + * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RA1; + vmovdqu (2 * 32)(%rdx), RA2; + vmovdqu (3 * 32)(%rdx), RA3; + vmovdqu (4 * 32)(%rdx), RB0; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RB2; + vmovdqu (7 * 32)(%rdx), RB3; + + call __sm4_crypt_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RA1, RA1; + vpxor (1 * 32 + 16)(%rdx), RA2, RA2; + vpxor (2 * 32 + 16)(%rdx), RA3, RA3; + vpxor (3 * 32 + 16)(%rdx), RB0, RB0; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RB2, RB2; + vpxor (6 * 32 + 16)(%rdx), RB3, RB3; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) + +/* + * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RA1; + vmovdqu (1 * 32 + 16)(%rdx), RA2; + vmovdqu (2 * 32 + 16)(%rdx), RA3; + vmovdqu (3 * 32 + 16)(%rdx), RB0; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RB2; + vmovdqu (6 * 32 + 16)(%rdx), RB3; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + RET; +SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16) diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h new file mode 100644 index 000000000..1bceab751 --- /dev/null +++ b/arch/x86/crypto/sm4-avx.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_SM4_AVX_H +#define ASM_X86_SM4_AVX_H + +#include +#include + +typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); + +int sm4_avx_ecb_encrypt(struct skcipher_request *req); +int sm4_avx_ecb_decrypt(struct skcipher_request *req); + +int sm4_cbc_encrypt(struct skcipher_request *req); +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_cfb_encrypt(struct skcipher_request *req); +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +#endif diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c new file mode 100644 index 000000000..84bc718f4 --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16) + +asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cbc_dec_blk16); +} + + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cfb_dec_blk16); +} + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_ctr_enc_blk16); +} + +static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx2"); diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c new file mode 100644 index 000000000..7800f77d6 --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) + +asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_crypt8(rkey, dst, src, 8); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + while (nbytes >= SM4_BLOCK_SIZE) { + unsigned int nblocks = min(nbytes >> 4, 4u); + sm4_aesni_avx_crypt4(rkey, dst, src, nblocks); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +int sm4_avx_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_enc); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); + +int sm4_avx_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_dec); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); + +int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); + +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_dec, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream, + src, nblocks); + + src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; + dst += (nblocks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblocks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += (nblocks + 1) * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cbc_dec_blk8); +} + +int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cfb_encrypt); + +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblocks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblocks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt); + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cfb_dec_blk8); +} + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + for (i = 0; i < nblocks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + + sm4_crypt_block(ctx->rkey_enc, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_ctr_enc_blk8); +} + +static struct skcipher_alg sm4_aesni_avx_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx"); diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 000000000..31f9b2ec3 --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -0,0 +1,376 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2012-2013 Jussi Kivilinna + */ + +#include +#include +#include "glue_helper-asm-avx.S" + +.file "twofish-avx-x86_64-asm_64.S" + +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.text + +/* structure of crypto context */ +#define s0 0 +#define s1 1024 +#define s2 2048 +#define s3 3072 +#define w 4096 +#define k 4128 + +/********************************************************************** + 8-way AVX twofish + **********************************************************************/ +#define CTX %rdi + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX0 %xmm8 +#define RY0 %xmm9 + +#define RX1 %xmm10 +#define RY1 %xmm11 + +#define RK1 %xmm12 +#define RK2 %xmm13 + +#define RT %xmm14 +#define RR %xmm15 + +#define RID1 %r13 +#define RID1d %r13d +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RGS1 %r8 +#define RGS1d %r8d +#define RGS2 %r9 +#define RGS2d %r9d +#define RGS3 %r10 +#define RGS3d %r10d + + +#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \ + movzbl src ## bl, RID1d; \ + movzbl src ## bh, RID2d; \ + shrq $16, src; \ + movl t0(CTX, RID1, 4), dst ## d; \ + movl t1(CTX, RID2, 4), RID2d; \ + movzbl src ## bl, RID1d; \ + xorl RID2d, dst ## d; \ + movzbl src ## bh, RID2d; \ + interleave_op(il_reg); \ + xorl t2(CTX, RID1, 4), dst ## d; \ + xorl t3(CTX, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define G(gi1, gi2, x, t0, t1, t2, t3) \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1); \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2); \ + \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none); \ + shlq $32, RGS2; \ + orq RGS1, RGS2; \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none); \ + shlq $32, RGS1; \ + orq RGS1, RGS3; + +#define round_head_2(a, b, x1, y1, x2, y2) \ + vmovq b ## 1, RGI3; \ + vpextrq $1, b ## 1, RGI4; \ + \ + G(RGI1, RGI2, x1, s0, s1, s2, s3); \ + vmovq a ## 2, RGI1; \ + vpextrq $1, a ## 2, RGI2; \ + vmovq RGS2, x1; \ + vpinsrq $1, RGS3, x1, x1; \ + \ + G(RGI3, RGI4, y1, s1, s2, s3, s0); \ + vmovq b ## 2, RGI3; \ + vpextrq $1, b ## 2, RGI4; \ + vmovq RGS2, y1; \ + vpinsrq $1, RGS3, y1, y1; \ + \ + G(RGI1, RGI2, x2, s0, s1, s2, s3); \ + vmovq RGS2, x2; \ + vpinsrq $1, RGS3, x2, x2; \ + \ + G(RGI3, RGI4, y2, s1, s2, s3, s0); \ + vmovq RGS2, y2; \ + vpinsrq $1, RGS3, y2, y2; + +#define encround_tail(a, b, c, d, x, y, prerotate) \ + vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(b); \ + vpxor RT, c, c; \ + vpaddd y, x, y; \ + vpaddd y, RK2, y; \ + vpsrld $1, c, RT; \ + vpslld $(32 - 1), c, c; \ + vpor c, RT, c; \ + vpxor d, y, d; \ + +#define decround_tail(a, b, c, d, x, y, prerotate) \ + vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(a); \ + vpxor RT, c, c; \ + vpaddd y, x, y; \ + vpaddd y, RK2, y; \ + vpxor d, y, d; \ + vpsrld $1, d, y; \ + vpslld $(32 - 1), d, d; \ + vpor d, y, d; \ + +#define rotate_1l(x) \ + vpslld $1, x, RR; \ + vpsrld $(32 - 1), x, x; \ + vpor x, RR, x; + +#define preload_rgi(c) \ + vmovq c, RGI1; \ + vpextrq $1, c, RGI2; + +#define encrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); + +#define decrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); + +#define encrypt_cycle(n) \ + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); + +#define encrypt_cycle_last(n) \ + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy); + +#define decrypt_cycle(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); + +#define decrypt_cycle_last(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy); + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \ + vpxor x0, wkey, x0; \ + vpxor x1, wkey, x1; \ + vpxor x2, wkey, x2; \ + vpxor x3, wkey, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpxor x0, wkey, x0; \ + vpxor x1, wkey, x1; \ + vpxor x2, wkey, x2; \ + vpxor x3, wkey, x3; + +.align 8 +SYM_FUNC_START_LOCAL(__twofish_enc_blk8) + /* input: + * %rdi: ctx, CTX + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks + * output: + * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks + */ + + vmovdqu w(CTX), RK1; + + pushq %r13; + pushq %rbx; + pushq %rcx; + + inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + preload_rgi(RA1); + rotate_1l(RD1); + inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); + rotate_1l(RD2); + + encrypt_cycle(0); + encrypt_cycle(1); + encrypt_cycle(2); + encrypt_cycle(3); + encrypt_cycle(4); + encrypt_cycle(5); + encrypt_cycle(6); + encrypt_cycle_last(7); + + vmovdqu (w+4*4)(CTX), RK1; + + popq %rcx; + popq %rbx; + popq %r13; + + outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); + + RET; +SYM_FUNC_END(__twofish_enc_blk8) + +.align 8 +SYM_FUNC_START_LOCAL(__twofish_dec_blk8) + /* input: + * %rdi: ctx, CTX + * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks + * output: + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks + */ + + vmovdqu (w+4*4)(CTX), RK1; + + pushq %r13; + pushq %rbx; + + inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + preload_rgi(RC1); + rotate_1l(RA1); + inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); + rotate_1l(RA2); + + decrypt_cycle(7); + decrypt_cycle(6); + decrypt_cycle(5); + decrypt_cycle(4); + decrypt_cycle(3); + decrypt_cycle(2); + decrypt_cycle(1); + decrypt_cycle_last(0); + + vmovdqu (w)(CTX), RK1; + + popq %rbx; + popq %r13; + + outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); + + RET; +SYM_FUNC_END(__twofish_dec_blk8) + +SYM_FUNC_START(twofish_ecb_enc_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + movq %rsi, %r11; + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __twofish_enc_blk8; + + store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); + + FRAME_END + RET; +SYM_FUNC_END(twofish_ecb_enc_8way) + +SYM_FUNC_START(twofish_ecb_dec_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + movq %rsi, %r11; + + load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); + + call __twofish_dec_blk8; + + store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + FRAME_END + RET; +SYM_FUNC_END(twofish_ecb_dec_8way) + +SYM_FUNC_START(twofish_cbc_dec_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + FRAME_BEGIN + + pushq %r12; + + movq %rsi, %r11; + movq %rdx, %r12; + + load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); + + call __twofish_dec_blk8; + + store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + popq %r12; + + FRAME_END + RET; +SYM_FUNC_END(twofish_cbc_dec_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 000000000..3abcad661 --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +***************************************************************************/ + +.file "twofish-i586-asm.S" +.text + +#include +#include + +/* return address at 0 */ + +#define in_blk 12 /* input byte array address parameter*/ +#define out_blk 8 /* output byte array address parameter*/ +#define ctx 4 /* Twofish context structure */ + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define encrypt_last_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define decrypt_last_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + ror $1, d ## D; + +SYM_FUNC_START(twofish_enc_blk) + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base + * pointer to the ctx address */ + mov in_blk+16(%esp),%edi /* input address in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + input_whitening(%eax,%ebp,a_offset) + ror $16, %eax + input_whitening(%ebx,%ebp,b_offset) + input_whitening(%ecx,%ebp,c_offset) + input_whitening(%edx,%ebp,d_offset) + rol $1, %edx + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + output_whitening(%eax,%ebp,c_offset) + output_whitening(%ebx,%ebp,d_offset) + output_whitening(%ecx,%ebp,a_offset) + output_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + RET +SYM_FUNC_END(twofish_enc_blk) + +SYM_FUNC_START(twofish_dec_blk) + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + + mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base + * pointer to the ctx address */ + mov in_blk+16(%esp),%edi /* input address in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + output_whitening(%eax,%ebp,a_offset) + output_whitening(%ebx,%ebp,b_offset) + ror $16, %ebx + output_whitening(%ecx,%ebp,c_offset) + output_whitening(%edx,%ebp,d_offset) + rol $1, %ecx + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%eax,%ebp,c_offset) + input_whitening(%ebx,%ebp,d_offset) + input_whitening(%ecx,%ebp,a_offset) + input_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + RET +SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S new file mode 100644 index 000000000..d2288bf38 --- /dev/null +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -0,0 +1,305 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Twofish Cipher 3-way parallel algorithm (x86_64) + * + * Copyright (C) 2011 Jussi Kivilinna + */ + +#include + +.file "twofish-x86_64-asm-3way.S" +.text + +/* structure of crypto context */ +#define s0 0 +#define s1 1024 +#define s2 2048 +#define s3 3072 +#define w 4096 +#define k 4128 + +/********************************************************************** + 3-way twofish + **********************************************************************/ +#define CTX %rdi +#define RIO %rdx + +#define RAB0 %rax +#define RAB1 %rbx +#define RAB2 %rcx + +#define RAB0d %eax +#define RAB1d %ebx +#define RAB2d %ecx + +#define RAB0bh %ah +#define RAB1bh %bh +#define RAB2bh %ch + +#define RAB0bl %al +#define RAB1bl %bl +#define RAB2bl %cl + +#define CD0 0x0(%rsp) +#define CD1 0x8(%rsp) +#define CD2 0x10(%rsp) + +# used only before/after all rounds +#define RCD0 %r8 +#define RCD1 %r9 +#define RCD2 %r10 + +# used only during rounds +#define RX0 %r8 +#define RX1 %r9 +#define RX2 %r10 + +#define RX0d %r8d +#define RX1d %r9d +#define RX2d %r10d + +#define RY0 %r11 +#define RY1 %r12 +#define RY2 %r13 + +#define RY0d %r11d +#define RY1d %r12d +#define RY2d %r13d + +#define RT0 %rdx +#define RT1 %rsi + +#define RT0d %edx +#define RT1d %esi + +#define RT1bl %sil + +#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $(rot), ab; \ + op1##l T0(CTX, tmp2, 4), dst ## d; \ + op2##l T1(CTX, tmp1, 4), dst ## d; + +#define swap_ab_with_cd(ab, cd, tmp) \ + movq cd, tmp; \ + movq ab, cd; \ + movq tmp, ab; + +/* + * Combined G1 & G2 function. Reordered with help of rotates to have moves + * at beginning. + */ +#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ + /* G1,1 && G2,1 */ \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ + \ + /* G1,2 && G2,2 */ \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ + swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ + swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ + swap_ab_with_cd(ab ## 2, cd ## 2, RT0); + +#define enc_round_end(ab, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + xorl ab ## d, x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + shrq $32, ab; \ + roll $1, ab ## d; \ + xorl y ## d, ab ## d; \ + shlq $32, ab; \ + rorl $1, x ## d; \ + orq x, ab; + +#define dec_round_end(ba, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + xorl ba ## d, y ## d; \ + shrq $32, ba; \ + roll $1, ba ## d; \ + xorl x ## d, ba ## d; \ + shlq $32, ba; \ + rorl $1, y ## d; \ + orq y, ba; + +#define encrypt_round3(ab, cd, n) \ + g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ + \ + enc_round_end(ab ## 0, RX0, RY0, n); \ + enc_round_end(ab ## 1, RX1, RY1, n); \ + enc_round_end(ab ## 2, RX2, RY2, n); + +#define decrypt_round3(ba, dc, n) \ + g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ + \ + dec_round_end(ba ## 0, RX0, RY0, n); \ + dec_round_end(ba ## 1, RX1, RY1, n); \ + dec_round_end(ba ## 2, RX2, RY2, n); + +#define encrypt_cycle3(ab, cd, n) \ + encrypt_round3(ab, cd, n*2); \ + encrypt_round3(ab, cd, (n*2)+1); + +#define decrypt_cycle3(ba, dc, n) \ + decrypt_round3(ba, dc, (n*2)+1); \ + decrypt_round3(ba, dc, (n*2)); + +#define push_cd() \ + pushq RCD2; \ + pushq RCD1; \ + pushq RCD0; + +#define pop_cd() \ + popq RCD0; \ + popq RCD1; \ + popq RCD2; + +#define inpack3(in, n, xy, m) \ + movq 4*(n)(in), xy ## 0; \ + xorq w+4*m(CTX), xy ## 0; \ + \ + movq 4*(4+(n))(in), xy ## 1; \ + xorq w+4*m(CTX), xy ## 1; \ + \ + movq 4*(8+(n))(in), xy ## 2; \ + xorq w+4*m(CTX), xy ## 2; + +#define outunpack3(op, out, n, xy, m) \ + xorq w+4*m(CTX), xy ## 0; \ + op ## q xy ## 0, 4*(n)(out); \ + \ + xorq w+4*m(CTX), xy ## 1; \ + op ## q xy ## 1, 4*(4+(n))(out); \ + \ + xorq w+4*m(CTX), xy ## 2; \ + op ## q xy ## 2, 4*(8+(n))(out); + +#define inpack_enc3() \ + inpack3(RIO, 0, RAB, 0); \ + inpack3(RIO, 2, RCD, 2); + +#define outunpack_enc3(op) \ + outunpack3(op, RIO, 2, RAB, 6); \ + outunpack3(op, RIO, 0, RCD, 4); + +#define inpack_dec3() \ + inpack3(RIO, 0, RAB, 4); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + inpack3(RIO, 2, RCD, 6); \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; + +#define outunpack_dec3() \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; \ + outunpack3(mov, RIO, 0, RCD, 0); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + outunpack3(mov, RIO, 2, RAB, 2); + +SYM_FUNC_START(__twofish_enc_blk_3way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src, RIO + * %rcx: bool, if true: xor output + */ + pushq %r13; + pushq %r12; + pushq %rbx; + + pushq %rcx; /* bool xor */ + pushq %rsi; /* dst */ + + inpack_enc3(); + + push_cd(); + encrypt_cycle3(RAB, CD, 0); + encrypt_cycle3(RAB, CD, 1); + encrypt_cycle3(RAB, CD, 2); + encrypt_cycle3(RAB, CD, 3); + encrypt_cycle3(RAB, CD, 4); + encrypt_cycle3(RAB, CD, 5); + encrypt_cycle3(RAB, CD, 6); + encrypt_cycle3(RAB, CD, 7); + pop_cd(); + + popq RIO; /* dst */ + popq RT1; /* bool xor */ + + testb RT1bl, RT1bl; + jnz .L__enc_xor3; + + outunpack_enc3(mov); + + popq %rbx; + popq %r12; + popq %r13; + RET; + +.L__enc_xor3: + outunpack_enc3(xor); + + popq %rbx; + popq %r12; + popq %r13; + RET; +SYM_FUNC_END(__twofish_enc_blk_3way) + +SYM_FUNC_START(twofish_dec_blk_3way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src, RIO + */ + pushq %r13; + pushq %r12; + pushq %rbx; + + pushq %rsi; /* dst */ + + inpack_dec3(); + + push_cd(); + decrypt_cycle3(RAB, CD, 7); + decrypt_cycle3(RAB, CD, 6); + decrypt_cycle3(RAB, CD, 5); + decrypt_cycle3(RAB, CD, 4); + decrypt_cycle3(RAB, CD, 3); + decrypt_cycle3(RAB, CD, 2); + decrypt_cycle3(RAB, CD, 1); + decrypt_cycle3(RAB, CD, 0); + pop_cd(); + + popq RIO; /* dst */ + + outunpack_dec3(); + + popq %rbx; + popq %r12; + popq %r13; + RET; +SYM_FUNC_END(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S new file mode 100644 index 000000000..775af290c --- /dev/null +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +***************************************************************************/ + +.file "twofish-x86_64-asm.S" +.text + +#include +#include + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0 %rax +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1 %rbx +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2 %rcx +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3 %rdx +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + movzx b ## B, %edi;\ + mov s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + mov s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s3(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor (%r11,%rdi,4), %r9d;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + xor s1(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D; + +/* + * a input register containing a(rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * during the round a and b are prepared for the output whitening + */ +#define encrypt_last_round(a,b,c,d,round)\ + mov b ## D, %r10d;\ + shl $32, %r10;\ + movzx b ## B, %edi;\ + mov s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + mov s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s3(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor (%r11,%rdi,4), %r9d;\ + xor a, %r10;\ + movzx b ## H, %edi;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + xor s1(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c (already rol $1) + * d input register containing d + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + movzx a ## B, %edi;\ + mov (%r11,%rdi,4), %r9d;\ + movzx b ## B, %edi;\ + mov s3(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## B, %edi;\ + xor s2(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s1(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + xor s2(%r11,%rdi,4),%r8d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b + * c input register containing c (already rol $1) + * d input register containing d + * operations on a and b are interleaved to increase performance + * during the round a and b are prepared for the output whitening + */ +#define decrypt_last_round(a,b,c,d,round)\ + movzx a ## B, %edi;\ + mov (%r11,%rdi,4), %r9d;\ + movzx b ## B, %edi;\ + mov s3(%r11,%rdi,4),%r8d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + mov b ## D, %r10d;\ + shl $32, %r10;\ + xor a, %r10;\ + ror $16, a ## D;\ + xor s1(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + xor s3(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D;\ + ror $1, d ## D; + +SYM_FUNC_START(twofish_enc_blk) + pushq R1 + + /* %rdi contains the ctx address */ + /* %rsi contains the output address */ + /* %rdx contains the input address */ + /* ctx address is moved to free one non-rex register + as target for the 8bit high operations */ + mov %rdi, %r11 + + movq (R3), R1 + movq 8(R3), R3 + input_whitening(R1,%r11,a_offset) + input_whitening(R3,%r11,c_offset) + mov R1D, R0D + rol $16, R0D + shr $32, R1 + mov R3D, R2D + shr $32, R3 + rol $1, R3D + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + + output_whitening(%r10,%r11,a_offset) + movq %r10, (%rsi) + + shl $32, R1 + xor R0, R1 + + output_whitening(R1,%r11,c_offset) + movq R1, 8(%rsi) + + popq R1 + movl $1,%eax + RET +SYM_FUNC_END(twofish_enc_blk) + +SYM_FUNC_START(twofish_dec_blk) + pushq R1 + + /* %rdi contains the ctx address */ + /* %rsi contains the output address */ + /* %rdx contains the input address */ + /* ctx address is moved to free one non-rex register + as target for the 8bit high operations */ + mov %rdi, %r11 + + movq (R3), R1 + movq 8(R3), R3 + output_whitening(R1,%r11,a_offset) + output_whitening(R3,%r11,c_offset) + mov R1D, R0D + shr $32, R1 + rol $16, R1D + mov R3D, R2D + shr $32, R3 + rol $1, R2D + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%r10,%r11,a_offset) + movq %r10, (%rsi) + + shl $32, R1 + xor R0, R1 + + input_whitening(R1,%r11,c_offset) + movq R1, 8(%rsi) + + popq R1 + movl $1,%eax + RET +SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish.h b/arch/x86/crypto/twofish.h new file mode 100644 index 000000000..12df400e6 --- /dev/null +++ b/arch/x86/crypto/twofish.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_TWOFISH_H +#define ASM_X86_TWOFISH_H + +#include +#include +#include + +/* regular block cipher functions from twofish_x86_64 module */ +asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src); + +/* helpers from twofish_x86_64-3way module */ +extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src); + +#endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 000000000..3eb3440b4 --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for AVX assembler version of Twofish Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * Copyright © 2013 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "twofish.h" +#include "ecb_cbc_helpers.h" + +#define TWOFISH_PARALLEL_BLOCKS 8 + +/* 8-way parallel cipher functions */ +asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); + +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + +static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, false); +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_enc_8way); + ECB_BLOCK(3, twofish_enc_blk_3way); + ECB_BLOCK(1, twofish_enc_blk); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_dec_8way); + ECB_BLOCK(3, twofish_dec_blk_3way); + ECB_BLOCK(1, twofish_dec_blk); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(twofish_enc_blk); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_cbc_dec_8way); + CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); + CBC_DEC_BLOCK(1, twofish_dec_blk); + CBC_WALK_END(); +} + +static struct skcipher_alg twofish_algs[] = { + { + .base.cra_name = "__ecb(twofish)", + .base.cra_driver_name = "__ecb-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(twofish)", + .base.cra_driver_name = "__cbc-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)]; + +static int __init twofish_init(void) +{ + const char *feature_name; + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(twofish_algs, + ARRAY_SIZE(twofish_algs), + twofish_simd_algs); +} + +static void __exit twofish_exit(void) +{ + simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs), + twofish_simd_algs); +} + +module_init(twofish_init); +module_exit(twofish_exit); + +MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("twofish"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c new file mode 100644 index 000000000..f9c4adc27 --- /dev/null +++ b/arch/x86/crypto/twofish_glue.c @@ -0,0 +1,100 @@ +/* + * Glue Code for assembler optimized version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(twofish_enc_blk); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(twofish_dec_blk); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init twofish_glue_init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit twofish_glue_fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(twofish_glue_init); +module_exit(twofish_glue_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c new file mode 100644 index 000000000..90454cf18 --- /dev/null +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Glue Code for 3-way parallel assembler optimized version of Twofish + * + * Copyright (c) 2011 Jussi Kivilinna + */ + +#include +#include +#include +#include +#include +#include + +#include "twofish.h" +#include "ecb_cbc_helpers.h" + +EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); +EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); + +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + +static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, false); +} + +void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) +{ + u8 buf[2][TF_BLOCK_SIZE]; + const u8 *s = src; + + if (dst == src) + s = memcpy(buf, src, sizeof(buf)); + twofish_dec_blk_3way(ctx, dst, src); + crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf)); + +} +EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); + +static int ecb_encrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, TF_BLOCK_SIZE, -1); + ECB_BLOCK(3, twofish_enc_blk_3way); + ECB_BLOCK(1, twofish_enc_blk); + ECB_WALK_END(); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + ECB_WALK_START(req, TF_BLOCK_SIZE, -1); + ECB_BLOCK(3, twofish_dec_blk_3way); + ECB_BLOCK(1, twofish_dec_blk); + ECB_WALK_END(); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(twofish_enc_blk); + CBC_WALK_END(); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); + CBC_DEC_BLOCK(1, twofish_dec_blk); + CBC_WALK_END(); +} + +static struct skcipher_alg tf_skciphers[] = { + { + .base.cra_name = "ecb(twofish)", + .base.cra_driver_name = "ecb-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(twofish)", + .base.cra_driver_name = "cbc-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x06 && + (boot_cpu_data.x86_model == 0x1c || + boot_cpu_data.x86_model == 0x26 || + boot_cpu_data.x86_model == 0x36)) { + /* + * On Atom, twofish-3way is slower than original assembler + * implementation. Twofish-3way trades off some performance in + * storing blocks in 64bit registers to allow three blocks to + * be processed parallel. Parallel operation then allows gaining + * more performance than was trade off, on out-of-order CPUs. + * However Atom does not benefit from this parallelism and + * should be blacklisted. + */ + return true; + } + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, twofish-3way is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init twofish_3way_init(void) +{ + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "twofish-x86_64-3way: performance on this CPU " + "would be suboptimal: disabling " + "twofish-x86_64-3way.\n"); + return -ENODEV; + } + + return crypto_register_skciphers(tf_skciphers, + ARRAY_SIZE(tf_skciphers)); +} + +static void __exit twofish_3way_fini(void) +{ + crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); +} + +module_init(twofish_3way_init); +module_exit(twofish_3way_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile new file mode 100644 index 000000000..ca2fe1869 --- /dev/null +++ b/arch/x86/entry/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the x86 low level entry code +# + +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) + +CFLAGS_common.o += -fno-stack-protector + +obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o +obj-y += common.o + +obj-y += vdso/ +obj-y += vsyscall/ + +obj-$(CONFIG_PREEMPTION) += thunk_$(BITS).o +obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o +obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o + diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h new file mode 100644 index 000000000..f69076271 --- /dev/null +++ b/arch/x86/entry/calling.h @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + +#ifdef CONFIG_X86_64 + +/* + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: + */ + +.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ + movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + .else + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + .endif + pushq \rdx /* pt_regs->dx */ + pushq \rcx /* pt_regs->cx */ + pushq \rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS + + .if \save_ret + pushq %rsi /* return address on top of stack */ + .endif +.endm + +.macro CLEAR_REGS + /* + * Sanitize registers of values that a speculation attack might + * otherwise want to exploit. The lower registers are likely clobbered + * well before they could be put to use in a speculative execution + * gadget. + */ + xorl %esi, %esi /* nospec si */ + xorl %edx, %edx /* nospec dx */ + xorl %ecx, %ecx /* nospec cx */ + xorl %r8d, %r8d /* nospec r8 */ + xorl %r9d, %r9d /* nospec r9 */ + xorl %r10d, %r10d /* nospec r10 */ + xorl %r11d, %r11d /* nospec r11 */ + xorl %ebx, %ebx /* nospec rbx */ + xorl %ebp, %ebp /* nospec rbp */ + xorl %r12d, %r12d /* nospec r12 */ + xorl %r13d, %r13d /* nospec r13 */ + xorl %r14d, %r14d /* nospec r14 */ + xorl %r15d, %r15d /* nospec r15 */ + +.endm + +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 + PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret + CLEAR_REGS +.endm + +.macro POP_REGS pop_rdi=1 + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + .if \pop_rdi + popq %rdi + .endif +.endm + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + +/* + * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two + * halves: + */ +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) + +.macro SET_NOFLUSH_BIT reg:req + bts $X86_CR3_PCID_NOFLUSH_BIT, \reg +.endm + +.macro ADJUST_KERNEL_CR3 reg:req + ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID + /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg +.endm + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + ADJUST_KERNEL_CR3 \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +#define THIS_CPU_user_pcid_flush_mask \ + PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask + +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * Test if the ASID needs a flush. + */ + movq \scratch_reg, \scratch_reg2 + andq $(0x7FF), \scratch_reg /* mask ASID */ + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + /* Flush needed, clear the bit */ + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + movq \scratch_reg2, \scratch_reg + jmp .Lwrcr3_pcid_\@ + +.Lnoflush_\@: + movq \scratch_reg2, \scratch_reg + SET_NOFLUSH_BIT \scratch_reg + +.Lwrcr3_pcid_\@: + /* Flip the ASID to the user version */ + orq $(PTI_USER_PCID_MASK), \scratch_reg + +.Lwrcr3_\@: + /* Flip the PGD to the user version */ + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req + pushq %rax + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax + popq %rax +.endm + +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI + movq %cr3, \scratch_reg + movq \scratch_reg, \save_reg + /* + * Test the user pagetable bit. If set, then the user page tables + * are active. If clear CR3 already has the kernel page table + * active. + */ + bt $PTI_USER_PGTABLE_BIT, \scratch_reg + jnc .Ldone_\@ + + ADJUST_KERNEL_CR3 \scratch_reg + movq \scratch_reg, %cr3 + +.Ldone_\@: +.endm + +.macro RESTORE_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * KERNEL pages can always resume with NOFLUSH as we do + * explicit flushes. + */ + bt $PTI_USER_PGTABLE_BIT, \save_reg + jnc .Lnoflush_\@ + + /* + * Check if there's a pending flush for the user ASID we're + * about to set. + */ + movq \save_reg, \scratch_reg + andq $(0x7FF), \scratch_reg + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + SET_NOFLUSH_BIT \save_reg + +.Lwrcr3_\@: + /* + * The CR3 write could be avoided when not changing its value, + * but would require a CR3 read *and* a scratch register. + */ + movq \save_reg, %cr3 +.Lend_\@: +.endm + +#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req +.endm +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req +.endm +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req +.endm +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req +.endm +.macro RESTORE_CR3 scratch_reg:req save_reg:req +.endm + +#endif + +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to + * prevent a speculative swapgs when coming from kernel space. + * + * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, + * to prevent the swapgs from getting speculatively skipped when coming from + * user space. + */ +.macro FENCE_SWAPGS_USER_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER +.endm +.macro FENCE_SWAPGS_KERNEL_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL +.endm + +.macro STACKLEAK_ERASE_NOCLOBBER +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + PUSH_AND_CLEAR_REGS + call stackleak_erase + POP_REGS +#endif +.endm + +.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req + rdgsbase \save_reg + GET_PERCPU_BASE \scratch_reg + wrgsbase \scratch_reg +.endm + +#else /* CONFIG_X86_64 */ +# undef UNWIND_HINT_IRET_REGS +# define UNWIND_HINT_IRET_REGS +#endif /* !CONFIG_X86_64 */ + +.macro STACKLEAK_ERASE +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase +#endif +.endm + +#ifdef CONFIG_SMP + +/* + * CPU/node NR is loaded from the limit (size) field of a special segment + * descriptor entry in GDT. + */ +.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req + movq $__CPUNODE_SEG, \reg + lsl \reg, \reg +.endm + +/* + * Fetch the per-CPU GSBASE value for this processor and put it in @reg. + * We normally use %gs for accessing per-CPU data, but we are setting up + * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. + */ +.macro GET_PERCPU_BASE reg:req + LOAD_CPU_AND_NODE_SEG_LIMIT \reg + andq $VDSO_CPUNODE_MASK, \reg + movq __per_cpu_offset(, \reg, 8), \reg +.endm + +#else + +.macro GET_PERCPU_BASE reg:req + movq pcpu_unit_offsets(%rip), \reg +.endm + +#endif /* CONFIG_SMP */ diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c new file mode 100644 index 000000000..9c0b26ae5 --- /dev/null +++ b/arch/x86/entry/common.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * common.c - C code for kernel entry and exit + * Copyright (c) 2015 Andrew Lutomirski + * + * Based on asm and ptrace code by many authors. The code here originated + * in ptrace.c and signal.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_XEN_PV +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 + +static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) +{ + /* + * Convert negative numbers to very high and thus out of range + * numbers for comparisons. + */ + unsigned int unr = nr; + + if (likely(unr < NR_syscalls)) { + unr = array_index_nospec(unr, NR_syscalls); + regs->ax = sys_call_table[unr](regs); + return true; + } + return false; +} + +static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) +{ + /* + * Adjust the starting offset of the table, and convert numbers + * < __X32_SYSCALL_BIT to very high and thus out of range + * numbers for comparisons. + */ + unsigned int xnr = nr - __X32_SYSCALL_BIT; + + if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { + xnr = array_index_nospec(xnr, X32_NR_syscalls); + regs->ax = x32_sys_call_table[xnr](regs); + return true; + } + return false; +} + +__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr) +{ + add_random_kstack_offset(); + nr = syscall_enter_from_user_mode(regs, nr); + + instrumentation_begin(); + + if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { + /* Invalid system call, but still a system call. */ + regs->ax = __x64_sys_ni_syscall(regs); + } + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +static __always_inline int syscall_32_enter(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_IA32_EMULATION)) + current_thread_info()->status |= TS_COMPAT; + + return (int)regs->orig_ax; +} + +#ifdef CONFIG_IA32_EMULATION +bool __ia32_enabled __ro_after_init = true; +#endif + +/* + * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. + */ +static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) +{ + /* + * Convert negative numbers to very high and thus out of range + * numbers for comparisons. + */ + unsigned int unr = nr; + + if (likely(unr < IA32_NR_syscalls)) { + unr = array_index_nospec(unr, IA32_NR_syscalls); + regs->ax = ia32_sys_call_table[unr](regs); + } else if (nr != -1) { + regs->ax = __ia32_sys_ni_syscall(regs); + } +} + +#ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ +__visible noinstr void do_int80_syscall_32(struct pt_regs *regs) +{ + int nr = syscall_32_enter(regs); + + add_random_kstack_offset(); + /* + * Subtlety here: if ptrace pokes something larger than 2^31-1 into + * orig_ax, the int return value truncates it. This matches + * the semantics of syscall_get_nr(). + */ + nr = syscall_enter_from_user_mode(regs, nr); + instrumentation_begin(); + + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif /* !CONFIG_IA32_EMULATION */ + +static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) +{ + int nr = syscall_32_enter(regs); + int res; + + add_random_kstack_offset(); + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + + instrumentation_begin(); + /* Fetch EBP from where the vDSO stashed it. */ + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Micro-optimization: the pointer we're following is + * explicitly 32 bits, so it can't be out of range. + */ + res = __get_user(*(u32 *)®s->bp, + (u32 __user __force *)(unsigned long)(u32)regs->sp); + } else { + res = get_user(*(u32 *)®s->bp, + (u32 __user __force *)(unsigned long)(u32)regs->sp); + } + + if (res) { + /* User code screwed up. */ + regs->ax = -EFAULT; + + local_irq_disable(); + instrumentation_end(); + irqentry_exit_to_user_mode(regs); + return false; + } + + nr = syscall_enter_from_user_mode_work(regs, nr); + + /* Now this is just like a normal syscall. */ + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); + return true; +} + +/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ +__visible noinstr long do_fast_syscall_32(struct pt_regs *regs) +{ + /* + * Called using the internal vDSO SYSENTER/SYSCALL32 calling + * convention. Adjust regs so it looks like we entered using int80. + */ + unsigned long landing_pad = (unsigned long)current->mm->context.vdso + + vdso_image_32.sym_int80_landing_pad; + + /* + * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward + * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. + * Fix it up. + */ + regs->ip = landing_pad; + + /* Invoke the syscall. If it failed, keep it simple: use IRET. */ + if (!__do_fast_syscall_32(regs)) + return 0; + +#ifdef CONFIG_X86_64 + /* + * Opportunistic SYSRETL: if possible, try to return using SYSRETL. + * SYSRETL is available on all 64-bit CPUs, so we don't need to + * bother with SYSEXIT. + * + * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, + * because the ECX fixup above will ensure that this is essentially + * never the case. + */ + return regs->cs == __USER32_CS && regs->ss == __USER_DS && + regs->ip == landing_pad && + (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; +#else + /* + * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT. + * + * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, + * because the ECX fixup above will ensure that this is essentially + * never the case. + * + * We don't allow syscalls at all from VM86 mode, but we still + * need to check VM, because we might be returning from sys_vm86. + */ + return static_cpu_has(X86_FEATURE_SEP) && + regs->cs == __USER_CS && regs->ss == __USER_DS && + regs->ip == landing_pad && + (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; +#endif +} + +/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ +__visible noinstr long do_SYSENTER_32(struct pt_regs *regs) +{ + /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ + regs->sp = regs->bp; + + /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */ + regs->flags |= X86_EFLAGS_IF; + + return do_fast_syscall_32(regs); +} +#endif + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} + +#ifdef CONFIG_XEN_PV +#ifndef CONFIG_PREEMPTION +/* + * Some hypercalls issued by the toolstack can take many 10s of + * seconds. Allow tasks running hypercalls via the privcmd driver to + * be voluntarily preempted even if full kernel preemption is + * disabled. + * + * Such preemptible hypercalls are bracketed by + * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() + * calls. + */ +DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); +EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); + +/* + * In case of scheduling the flag must be cleared and restored after + * returning from schedule as the task might move to a different CPU. + */ +static __always_inline bool get_and_clear_inhcall(void) +{ + bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); + + __this_cpu_write(xen_in_preemptible_hcall, false); + return inhcall; +} + +static __always_inline void restore_inhcall(bool inhcall) +{ + __this_cpu_write(xen_in_preemptible_hcall, inhcall); +} +#else +static __always_inline bool get_and_clear_inhcall(void) { return false; } +static __always_inline void restore_inhcall(bool inhcall) { } +#endif + +static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + inc_irq_stat(irq_hv_callback_count); + + xen_evtchn_do_upcall(); + + set_irq_regs(old_regs); +} + +__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + bool inhcall; + + instrumentation_begin(); + run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); + + inhcall = get_and_clear_inhcall(); + if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { + irqentry_exit_cond_resched(); + instrumentation_end(); + restore_inhcall(inhcall); + } else { + instrumentation_end(); + irqentry_exit(regs, state); + } +} +#endif /* CONFIG_XEN_PV */ diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 000000000..bfb7bcb36 --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include +#include +#include + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S new file mode 100644 index 000000000..e309e7156 --- /dev/null +++ b/arch/x86/entry/entry_32.S @@ -0,0 +1,1252 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1991,1992 Linus Torvalds + * + * entry_32.S contains the system-call and low-level fault and trap handling routines. + * + * Stack layout while running C code: + * ptrace needs to have all registers on the stack. + * If the order here is changed, it needs to be + * updated in fork.c:copy_process(), signal.c:do_signal(), + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - %fs + * 28(%esp) - unused -- was %gs on old stackprotector kernels + * 2C(%esp) - orig_eax + * 30(%esp) - %eip + * 34(%esp) - %cs + * 38(%esp) - %eflags + * 3C(%esp) - %oldesp + * 40(%esp) - %oldss + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "calling.h" + + .section .entry.text, "ax" + +#define PTI_SWITCH_MASK (1 << PAGE_SHIFT) + +/* Unconditionally switch to user cr3 */ +.macro SWITCH_TO_USER_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + movl %cr3, \scratch_reg + orl $PTI_SWITCH_MASK, \scratch_reg + movl \scratch_reg, %cr3 +.Lend_\@: +.endm + +.macro BUG_IF_WRONG_CR3 no_user_check=0 +#ifdef CONFIG_DEBUG_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + .if \no_user_check == 0 + /* coming from usermode? */ + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) + jz .Lend_\@ + .endif + /* On user-cr3? */ + movl %cr3, %eax + testl $PTI_SWITCH_MASK, %eax + jnz .Lend_\@ + /* From userspace with kernel cr3 - BUG */ + ud2 +.Lend_\@: +#endif +.endm + +/* + * Switch to kernel cr3 if not already loaded and return current cr3 in + * \scratch_reg + */ +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + movl %cr3, \scratch_reg + /* Test if we are already on kernel CR3 */ + testl $PTI_SWITCH_MASK, \scratch_reg + jz .Lend_\@ + andl $(~PTI_SWITCH_MASK), \scratch_reg + movl \scratch_reg, %cr3 + /* Return original CR3 in \scratch_reg */ + orl $PTI_SWITCH_MASK, \scratch_reg +.Lend_\@: +.endm + +#define CS_FROM_ENTRY_STACK (1 << 31) +#define CS_FROM_USER_CR3 (1 << 30) +#define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) + +.macro FIXUP_FRAME + /* + * The high bits of the CS dword (__csh) are used for CS_FROM_*. + * Clear them in case hardware didn't do this for us. + */ + andl $0x0000ffff, 4*4(%esp) + +#ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, 5*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ +#endif + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ + + orl $CS_FROM_KERNEL, 4*4(%esp) + + /* + * When we're here from kernel mode; the (exception) stack looks like: + * + * 6*4(%esp) - + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs + * + * Lets build a 5 entry IRET frame after that, such that struct pt_regs + * is complete and in particular regs->sp is correct. This gives us + * the original 6 entries as gap: + * + * 14*4(%esp) - + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs + */ + + pushl %ss # ss + pushl %esp # sp (points at ss) + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs +.Lfrom_usermode_no_fixup_\@: +.endm + +.macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ + testl $CS_FROM_KERNEL, 1*4(%esp) + jz .Lfinished_frame_\@ + + /* + * Reconstruct the 3 entry IRET frame right after the (modified) + * regs->sp without lowering %esp in between, such that an NMI in the + * middle doesn't scribble our stack. + */ + pushl %eax + pushl %ecx + movl 5*4(%esp), %eax # (modified) regs->sp + + movl 4*4(%esp), %ecx # flags + movl %ecx, %ss:-1*4(%eax) + + movl 3*4(%esp), %ecx # cs + andl $0x0000ffff, %ecx + movl %ecx, %ss:-2*4(%eax) + + movl 2*4(%esp), %ecx # ip + movl %ecx, %ss:-3*4(%eax) + + movl 1*4(%esp), %ecx # eax + movl %ecx, %ss:-4*4(%eax) + + popl %ecx + lea -4*4(%eax), %esp + popl %eax +.Lfinished_frame_\@: +.endm + +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 + cld +.if \skip_gs == 0 + pushl $0 +.endif + pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME + pushl %es + pushl %ds + pushl \pt_regs_ax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es + /* Switch to kernel stack if necessary */ +.if \switch_stacks > 0 + SWITCH_TO_KERNEL_STACK +.endif +.endm + +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix + + BUG_IF_WRONG_CR3 + + /* + * Now switch the CR3 when PTI is enabled. + * + * We can enter with either user or kernel cr3, the code will + * store the old cr3 in \cr3_reg and switches to the kernel cr3 + * if necessary. + */ + SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg + +.Lend_\@: +.endm + +.macro RESTORE_INT_REGS + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax +.endm + +.macro RESTORE_REGS pop=0 + RESTORE_INT_REGS +1: popl %ds +2: popl %es +3: popl %fs +4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */ + IRET_FRAME + + /* + * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is + * ASM the registers are known and we can trivially hard-code them. + */ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS) + _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES) + _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS) +.endm + +.macro RESTORE_ALL_NMI cr3_reg:req pop=0 + /* + * Now switch the CR3 when PTI is enabled. + * + * We enter with kernel cr3 and switch the cr3 to the value + * stored on \cr3_reg, which is either a user or a kernel cr3. + */ + ALTERNATIVE "jmp .Lswitched_\@", "", X86_FEATURE_PTI + + testl $PTI_SWITCH_MASK, \cr3_reg + jz .Lswitched_\@ + + /* User cr3 in \cr3_reg - write it to hardware cr3 */ + movl \cr3_reg, %cr3 + +.Lswitched_\@: + + BUG_IF_WRONG_CR3 + + RESTORE_REGS pop=\pop +.endm + +.macro CHECK_AND_APPLY_ESPFIX +#ifdef CONFIG_X86_ESPFIX32 +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET + + ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX + + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + /* + * Warning: PT_OLDSS(%esp) contains the wrong/random values if we + * are returning to the kernel. + * See comments in process.c:copy_thread() for details. + */ + movb PT_OLDSS(%esp), %ah + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax + jne .Lend_\@ # returning to user-space with LDT SS + + /* + * Setup and switch to ESPFIX stack + * + * We're returning to userspace with a 16 bit stack. The CPU will not + * restore the high word of ESP for us on executing iret... This is an + * "official" bug of all the x86-compatible CPUs, which we can work + * around to make dosemu and wine happy. We do this by preloading the + * high word of ESP with the high word of the userspace ESP while + * compensating for the offset by changing to the ESPFIX segment with + * a base address that matches for the difference. + */ + mov %esp, %edx /* load kernel esp */ + mov PT_OLDESP(%esp), %eax /* load userspace esp */ + mov %dx, %ax /* eax: new kernel esp */ + sub %eax, %edx /* offset (low word is 0) */ + shr $16, %edx + mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ + mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ + pushl $__ESPFIX_SS + pushl %eax /* new kernel esp */ + /* + * Disable interrupts, but do not irqtrace this section: we + * will soon execute iret and the tracer was already set to + * the irqstate after the IRET: + */ + cli + lss (%esp), %esp /* switch to espfix segment */ +.Lend_\@: +#endif /* CONFIG_X86_ESPFIX32 */ +.endm + +/* + * Called with pt_regs fully populated and kernel segments loaded, + * so we can access PER_CPU and use the integer registers. + * + * We need to be very careful here with the %esp switch, because an NMI + * can happen everywhere. If the NMI handler finds itself on the + * entry-stack, it will overwrite the task-stack and everything we + * copied there. So allocate the stack-frame on the task-stack and + * switch to it before we do any copying. + */ + +.macro SWITCH_TO_KERNEL_STACK + + BUG_IF_WRONG_CR3 + + SWITCH_TO_KERNEL_CR3 scratch_reg=%eax + + /* + * %eax now contains the entry cr3 and we carry it forward in + * that register for the time this macro runs + */ + + /* Are we on the entry stack? Bail out if not! */ + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %esp, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx + jae .Lend_\@ + + /* Load stack pointer into %esi and %edi */ + movl %esp, %esi + movl %esi, %edi + + /* Move %edi to the top of the entry stack */ + andl $(MASK_entry_stack), %edi + addl $(SIZEOF_entry_stack), %edi + + /* Load top of task-stack into %edi */ + movl TSS_entry2task_stack(%edi), %edi + + /* Special case - entry from kernel mode via entry stack */ +#ifdef CONFIG_VM86 + movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS + movb PT_CS(%esp), %cl + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx +#else + movl PT_CS(%esp), %ecx + andl $SEGMENT_RPL_MASK, %ecx +#endif + cmpl $USER_RPL, %ecx + jb .Lentry_from_kernel_\@ + + /* Bytes to copy */ + movl $PTREGS_SIZE, %ecx + +#ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esi) + jz .Lcopy_pt_regs_\@ + + /* + * Stack-frame contains 4 additional segment registers when + * coming from VM86 mode + */ + addl $(4 * 4), %ecx + +#endif +.Lcopy_pt_regs_\@: + + /* Allocate frame on task-stack */ + subl %ecx, %edi + + /* Switch to task-stack */ + movl %edi, %esp + + /* + * We are now on the task-stack and can safely copy over the + * stack-frame + */ + shrl $2, %ecx + cld + rep movsl + + jmp .Lend_\@ + +.Lentry_from_kernel_\@: + + /* + * This handles the case when we enter the kernel from + * kernel-mode and %esp points to the entry-stack. When this + * happens we need to switch to the task-stack to run C code, + * but switch back to the entry-stack again when we approach + * iret and return to the interrupted code-path. This usually + * happens when we hit an exception while restoring user-space + * segment registers on the way back to user-space or when the + * sysenter handler runs with eflags.tf set. + * + * When we switch to the task-stack here, we can't trust the + * contents of the entry-stack anymore, as the exception handler + * might be scheduled out or moved to another CPU. Therefore we + * copy the complete entry-stack to the task-stack and set a + * marker in the iret-frame (bit 31 of the CS dword) to detect + * what we've done on the iret path. + * + * On the iret path we copy everything back and switch to the + * entry-stack, so that the interrupted kernel code-path + * continues on the same stack it was interrupted with. + * + * Be aware that an NMI can happen anytime in this code. + * + * %esi: Entry-Stack pointer (same as %esp) + * %edi: Top of the task stack + * %eax: CR3 on kernel entry + */ + + /* Calculate number of bytes on the entry stack in %ecx */ + movl %esi, %ecx + + /* %ecx to the top of entry-stack */ + andl $(MASK_entry_stack), %ecx + addl $(SIZEOF_entry_stack), %ecx + + /* Number of bytes on the entry stack to %ecx */ + sub %esi, %ecx + + /* Mark stackframe as coming from entry stack */ + orl $CS_FROM_ENTRY_STACK, PT_CS(%esp) + + /* + * Test the cr3 used to enter the kernel and add a marker + * so that we can switch back to it before iret. + */ + testl $PTI_SWITCH_MASK, %eax + jz .Lcopy_pt_regs_\@ + orl $CS_FROM_USER_CR3, PT_CS(%esp) + + /* + * %esi and %edi are unchanged, %ecx contains the number of + * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate + * the stack-frame on task-stack and copy everything over + */ + jmp .Lcopy_pt_regs_\@ + +.Lend_\@: +.endm + +/* + * Switch back from the kernel stack to the entry stack. + * + * The %esp register must point to pt_regs on the task stack. It will + * first calculate the size of the stack-frame to copy, depending on + * whether we return to VM86 mode or not. With that it uses 'rep movsl' + * to copy the contents of the stack over to the entry stack. + * + * We must be very careful here, as we can't trust the contents of the + * task-stack once we switched to the entry-stack. When an NMI happens + * while on the entry-stack, the NMI handler will switch back to the top + * of the task stack, overwriting our stack-frame we are about to copy. + * Therefore we switch the stack only after everything is copied over. + */ +.macro SWITCH_TO_ENTRY_STACK + + /* Bytes to copy */ + movl $PTREGS_SIZE, %ecx + +#ifdef CONFIG_VM86 + testl $(X86_EFLAGS_VM), PT_EFLAGS(%esp) + jz .Lcopy_pt_regs_\@ + + /* Additional 4 registers to copy when returning to VM86 mode */ + addl $(4 * 4), %ecx + +.Lcopy_pt_regs_\@: +#endif + + /* Initialize source and destination for movsl */ + movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi + subl %ecx, %edi + movl %esp, %esi + + /* Save future stack pointer in %ebx */ + movl %edi, %ebx + + /* Copy over the stack-frame */ + shrl $2, %ecx + cld + rep movsl + + /* + * Switch to entry-stack - needs to happen after everything is + * copied because the NMI handler will overwrite the task-stack + * when on entry-stack + */ + movl %ebx, %esp + +.Lend_\@: +.endm + +/* + * This macro handles the case when we return to kernel-mode on the iret + * path and have to switch back to the entry stack and/or user-cr3 + * + * See the comments below the .Lentry_from_kernel_\@ label in the + * SWITCH_TO_KERNEL_STACK macro for more details. + */ +.macro PARANOID_EXIT_TO_KERNEL_MODE + + /* + * Test if we entered the kernel with the entry-stack. Most + * likely we did not, because this code only runs on the + * return-to-kernel path. + */ + testl $CS_FROM_ENTRY_STACK, PT_CS(%esp) + jz .Lend_\@ + + /* Unlikely slow-path */ + + /* Clear marker from stack-frame */ + andl $(~CS_FROM_ENTRY_STACK), PT_CS(%esp) + + /* Copy the remaining task-stack contents to entry-stack */ + movl %esp, %esi + movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi + + /* Bytes on the task-stack to ecx */ + movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx + subl %esi, %ecx + + /* Allocate stack-frame on entry-stack */ + subl %ecx, %edi + + /* + * Save future stack-pointer, we must not switch until the + * copy is done, otherwise the NMI handler could destroy the + * contents of the task-stack we are about to copy. + */ + movl %edi, %ebx + + /* Do the copy */ + shrl $2, %ecx + cld + rep movsl + + /* Safe to switch to entry-stack now */ + movl %ebx, %esp + + /* + * We came from entry-stack and need to check if we also need to + * switch back to user cr3. + */ + testl $CS_FROM_USER_CR3, PT_CS(%esp) + jz .Lend_\@ + + /* Clear marker from stack-frame */ + andl $(~CS_FROM_USER_CR3), PT_CS(%esp) + + SWITCH_TO_USER_CR3 scratch_reg=%eax + +.Lend_\@: +.endm + +/** + * idtentry - Macro to generate entry stubs for simple IDT entries + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * @has_error_code: Hardware pushed error code on stack + */ +.macro idtentry vector asmsym cfunc has_error_code:req +SYM_CODE_START(\asmsym) + ASM_CLAC + cld + + .if \has_error_code == 0 + pushl $0 /* Clear the error code */ + .endif + + /* Push the C-function address into the GS slot */ + pushl $\cfunc + /* Invoke the common exception entry */ + jmp handle_exception +SYM_CODE_END(\asmsym) +.endm + +.macro idtentry_irq vector cfunc + .p2align CONFIG_X86_L1_CACHE_SHIFT +SYM_CODE_START_LOCAL(asm_\cfunc) + ASM_CLAC + SAVE_ALL switch_stacks=1 + ENCODE_FRAME_POINTER + movl %esp, %eax + movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ + movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ + call \cfunc + jmp handle_exception_return +SYM_CODE_END(asm_\cfunc) +.endm + +.macro idtentry_sysvec vector cfunc + idtentry \vector asm_\cfunc \cfunc has_error_code=0 +.endm + +/* + * Include the defines which emit the idt entries which are shared + * shared between 32 and 64 bit and emit the __irqentry_text_* markers + * so the stacktrace boundary checks work. + */ + .align 16 + .globl __irqentry_text_start +__irqentry_text_start: + +#include + + .align 16 + .globl __irqentry_text_end +__irqentry_text_end: + +/* + * %eax: prev task + * %edx: next task + */ +.pushsection .text, "ax" +SYM_CODE_START(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in struct inactive_task_frame + */ + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + /* + * Flags are saved to prevent AC leakage. This could go + * away if objtool would have 32bit support to verify + * the STAC/CLAC correctness. + */ + pushfl + + /* switch stack */ + movl %esp, TASK_threadsp(%eax) + movl TASK_threadsp(%edx), %esp + +#ifdef CONFIG_STACKPROTECTOR + movl TASK_stack_canary(%edx), %ebx + movl %ebx, PER_CPU_VAR(__stack_chk_guard) +#endif + + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + + /* Restore flags or the incoming task to restore AC state. */ + popfl + /* restore callee-saved registers */ + popl %esi + popl %edi + popl %ebx + popl %ebp + + jmp __switch_to +SYM_CODE_END(__switch_to_asm) +.popsection + +/* + * The unwinder expects the last frame on the stack to always be at the same + * offset from the end of the page, which allows it to validate the stack. + * Calling schedule_tail() directly would break that convention because its an + * asmlinkage function so its argument has to be pushed on the stack. This + * wrapper creates a proper "end of stack" frame header before the call. + */ +.pushsection .text, "ax" +SYM_FUNC_START(schedule_tail_wrapper) + FRAME_BEGIN + + pushl %eax + call schedule_tail + popl %eax + + FRAME_END + RET +SYM_FUNC_END(schedule_tail_wrapper) +.popsection + +/* + * A newly forked process directly context switches into this address. + * + * eax: prev task we switched from + * ebx: kernel thread func (NULL for user thread) + * edi: kernel thread arg + */ +.pushsection .text, "ax" +SYM_CODE_START(ret_from_fork) + call schedule_tail_wrapper + + testl %ebx, %ebx + jnz 1f /* kernel threads are uncommon */ + +2: + /* When we fork, we trace the syscall return in the child, too. */ + movl %esp, %eax + call syscall_exit_to_user_mode + jmp .Lsyscall_32_done + + /* kernel thread */ +1: movl %edi, %eax + CALL_NOSPEC ebx + /* + * A kernel thread is allowed to return here after successfully + * calling kernel_execve(). Exit to userspace to complete the execve() + * syscall. + */ + movl $0, PT_EAX(%esp) + jmp 2b +SYM_CODE_END(ret_from_fork) +.popsection + +SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) +/* + * All code from here through __end_SYSENTER_singlestep_region is subject + * to being single-stepped if a user program sets TF and executes SYSENTER. + * There is absolutely nothing that we can do to prevent this from happening + * (thanks Intel!). To keep our handling of this situation as simple as + * possible, we handle TF just like AC and NT, except that our #DB handler + * will ignore all of the single-step traps generated in this range. + */ + +/* + * 32-bit SYSENTER entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * if X86_FEATURE_SEP is available. This is the preferred system call + * entry on 32-bit systems. + * + * The SYSENTER instruction, in principle, should *only* occur in the + * vDSO. In practice, a small number of Android devices were shipped + * with a copy of Bionic that inlined a SYSENTER instruction. This + * never happened in any of Google's Bionic versions -- it only happened + * in a narrow range of Intel-provided versions. + * + * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. + * IF and VM in RFLAGS are cleared (IOW: interrupts are off). + * SYSENTER does not save anything on the stack, + * and does not save old EIP (!!!), ESP, or EFLAGS. + * + * To avoid losing track of EFLAGS.VM (and thus potentially corrupting + * user and/or vm86 state), we explicitly disable the SYSENTER + * instruction in vm86 mode by reprogramming the MSRs. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp user stack + * 0(%ebp) arg6 + */ +SYM_FUNC_START(entry_SYSENTER_32) + /* + * On entry-stack with all userspace-regs live - save and + * restore eflags and %eax to use it as scratch-reg for the cr3 + * switch. + */ + pushfl + pushl %eax + BUG_IF_WRONG_CR3 no_user_check=1 + SWITCH_TO_KERNEL_CR3 scratch_reg=%eax + popl %eax + popfl + + /* Stack empty again, switch to task stack */ + movl TSS_entry2task_stack(%esp), %esp + +.Lsysenter_past_esp: + pushl $__USER_DS /* pt_regs->ss */ + pushl $0 /* pt_regs->sp (placeholder) */ + pushfl /* pt_regs->flags (except IF = 0) */ + pushl $__USER_CS /* pt_regs->cs */ + pushl $0 /* pt_regs->ip = 0 (placeholder) */ + pushl %eax /* pt_regs->orig_ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, stack already switched */ + + /* + * SYSENTER doesn't filter flags, so we need to clear NT, AC + * and TF ourselves. To save a few cycles, we can check whether + * either was set instead of doing an unconditional popfq. + * This needs to happen before enabling interrupts so that + * we don't get preempted with NT set. + * + * If TF is set, we will single-step all the way to here -- do_debug + * will ignore all the traps. (Yes, this is slow, but so is + * single-stepping in general. This allows us to avoid having + * a more complicated code to handle the case where a user program + * forces us to single-step through the SYSENTER entry code.) + * + * NB.: .Lsysenter_fix_flags is a label with the code under it moved + * out-of-line as an optimization: NT is unlikely to be set in the + * majority of the cases and instead of polluting the I$ unnecessarily, + * we're keeping that code behind a branch which will predict as + * not-taken and therefore its instructions won't be fetched. + */ + testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: + + movl %esp, %eax + call do_SYSENTER_32 + testl %eax, %eax + jz .Lsyscall_32_done + + STACKLEAK_ERASE + + /* Opportunistic SYSEXIT */ + + /* + * Setup entry stack - we keep the pointer in %eax and do the + * switch after almost all user-state is restored. + */ + + /* Load entry stack pointer and allocate frame for eflags/eax */ + movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax + subl $(2*4), %eax + + /* Copy eflags and eax to entry stack */ + movl PT_EFLAGS(%esp), %edi + movl PT_EAX(%esp), %esi + movl %edi, (%eax) + movl %esi, 4(%eax) + + /* Restore user registers and segments */ + movl PT_EIP(%esp), %edx /* pt_regs->ip */ + movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ +1: mov PT_FS(%esp), %fs + + popl %ebx /* pt_regs->bx */ + addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + + /* Switch to entry stack */ + movl %eax, %esp + + /* Now ready to switch the cr3 */ + SWITCH_TO_USER_CR3 scratch_reg=%eax + + /* + * Restore all flags except IF. (We restore IF separately because + * STI gives a one-instruction window in which we won't be interrupted, + * whereas POPF does not.) + */ + btrl $X86_EFLAGS_IF_BIT, (%esp) + BUG_IF_WRONG_CR3 no_user_check=1 + popfl + popl %eax + + /* + * Return back to the vDSO, which will pop ecx and edx. + * Don't bother with DS and ES (they already contain __USER_DS). + */ + sti + sysexit + +2: movl $0, PT_FS(%esp) + jmp 1b + _ASM_EXTABLE(1b, 2b) + +.Lsysenter_fix_flags: + pushl $X86_EFLAGS_FIXED + popfl + jmp .Lsysenter_flags_fixed +SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) +SYM_FUNC_END(entry_SYSENTER_32) + +/* + * 32-bit legacy system call entry. + * + * 32-bit x86 Linux system calls traditionally used the INT $0x80 + * instruction. INT $0x80 lands here. + * + * This entry point can be used by any 32-bit perform system calls. + * Instances of INT $0x80 can be found inline in various programs and + * libraries. It is also used by the vDSO's __kernel_vsyscall + * fallback for hardware that doesn't support a faster entry method. + * Restarted 32-bit system calls also fall back to INT $0x80 + * regardless of what instruction was originally used to do the system + * call. (64-bit programs can use INT $0x80 as well, but they can + * only run on 64-bit kernels and therefore land in + * entry_INT80_compat.) + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp arg6 + */ +SYM_FUNC_START(entry_INT80_32) + ASM_CLAC + pushl %eax /* pt_regs->orig_ax */ + + SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */ + + movl %esp, %eax + call do_int80_syscall_32 +.Lsyscall_32_done: + STACKLEAK_ERASE + +restore_all_switch_stack: + SWITCH_TO_ENTRY_STACK + CHECK_AND_APPLY_ESPFIX + + /* Switch back to user CR3 */ + SWITCH_TO_USER_CR3 scratch_reg=%eax + + BUG_IF_WRONG_CR3 + + /* Restore user state */ + RESTORE_REGS pop=4 # skip orig_eax/error_code +.Lirq_return: + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization + * when returning from IPI handler and when returning from + * scheduler to user-space. + */ + iret + +.Lasm_iret_error: + pushl $0 # no error code + pushl $iret_error + +#ifdef CONFIG_DEBUG_ENTRY + /* + * The stack-frame here is the one that iret faulted on, so its a + * return-to-user frame. We are on kernel-cr3 because we come here from + * the fixup code. This confuses the CR3 checker, so switch to user-cr3 + * as the checker expects it. + */ + pushl %eax + SWITCH_TO_USER_CR3 scratch_reg=%eax + popl %eax +#endif + + jmp handle_exception + + _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error) +SYM_FUNC_END(entry_INT80_32) + +.macro FIXUP_ESPFIX_STACK +/* + * Switch back for ESPFIX stack to the normal zerobased stack + * + * We can't call C functions using the ESPFIX stack. This code reads + * the high word of the segment base from the GDT and swiches to the + * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. + */ +#ifdef CONFIG_X86_ESPFIX32 + /* fixup the stack */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ + shl $16, %eax + addl $2*4, %esp + popl %ecx + addl %esp, %eax /* the adjusted stack pointer */ + pushl $__KERNEL_DS + pushl %eax + lss (%esp), %esp /* switch to the normal stack segment */ +#endif +.endm + +.macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ +#ifdef CONFIG_X86_ESPFIX32 + movl %ss, %eax + /* see if on espfix stack */ + cmpw $__ESPFIX_SS, %ax + jne .Lno_fixup_\@ + /* switch to normal stack */ + FIXUP_ESPFIX_STACK +.Lno_fixup_\@: +#endif +.endm + +SYM_CODE_START_LOCAL_NOALIGN(handle_exception) + /* the function address is in %gs's slot on the stack */ + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 + ENCODE_FRAME_POINTER + + movl PT_GS(%esp), %edi # get the function address + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + + movl %esp, %eax # pt_regs pointer + CALL_NOSPEC edi + +handle_exception_return: +#ifdef CONFIG_VM86 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax +#else + /* + * We can be coming here from child spawned by kernel_thread(). + */ + movl PT_CS(%esp), %eax + andl $SEGMENT_RPL_MASK, %eax +#endif + cmpl $USER_RPL, %eax # returning to v8086 or userspace ? + jnb ret_to_user + + PARANOID_EXIT_TO_KERNEL_MODE + BUG_IF_WRONG_CR3 + RESTORE_REGS 4 + jmp .Lirq_return + +ret_to_user: + movl %esp, %eax + jmp restore_all_switch_stack +SYM_CODE_END(handle_exception) + +SYM_CODE_START(asm_exc_double_fault) +1: + /* + * This is a task gate handler, not an interrupt gate handler. + * The error code is on the stack, but the stack is otherwise + * empty. Interrupts are off. Our state is sane with the following + * exceptions: + * + * - CR0.TS is set. "TS" literally means "task switched". + * - EFLAGS.NT is set because we're a "nested task". + * - The doublefault TSS has back_link set and has been marked busy. + * - TR points to the doublefault TSS and the normal TSS is busy. + * - CR3 is the normal kernel PGD. This would be delightful, except + * that the CPU didn't bother to save the old CR3 anywhere. This + * would make it very awkward to return back to the context we came + * from. + * + * The rest of EFLAGS is sanitized for us, so we don't need to + * worry about AC or DF. + * + * Don't even bother popping the error code. It's always zero, + * and ignoring it makes us a bit more robust against buggy + * hypervisor task gate implementations. + * + * We will manually undo the task switch instead of doing a + * task-switching IRET. + */ + + clts /* clear CR0.TS */ + pushl $X86_EFLAGS_FIXED + popfl /* clear EFLAGS.NT */ + + call doublefault_shim + + /* We don't support returning, so we have no IRET here. */ +1: + hlt + jmp 1b +SYM_CODE_END(asm_exc_double_fault) + +/* + * NMI is doubly nasty. It can happen on the first instruction of + * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning + * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 + * switched stacks. We handle both conditions by simply checking whether we + * interrupted kernel code running on the SYSENTER stack. + */ +SYM_CODE_START(asm_exc_nmi) + ASM_CLAC + +#ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ + pushl %eax + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + je .Lnmi_espfix_stack +#endif + + pushl %eax # pt_regs->orig_ax + SAVE_ALL_NMI cr3_reg=%edi + ENCODE_FRAME_POINTER + xorl %edx, %edx # zero error code + movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx + jb .Lnmi_from_sysenter_stack + + /* Not on SYSENTER stack. */ + call exc_nmi + jmp .Lnmi_return + +.Lnmi_from_sysenter_stack: + /* + * We're on the SYSENTER stack. Switch off. No one (not even debug) + * is using the thread stack right now, so it's safe for us to use it. + */ + movl %esp, %ebx + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + call exc_nmi + movl %ebx, %esp + +.Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + + CHECK_AND_APPLY_ESPFIX + RESTORE_ALL_NMI cr3_reg=%edi pop=4 + jmp .Lirq_return + +#ifdef CONFIG_X86_ESPFIX32 +.Lnmi_espfix_stack: + /* + * Create the pointer to LSS back + */ + pushl %ss + pushl %esp + addl $4, (%esp) + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 + ENCODE_FRAME_POINTER + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + + xorl %edx, %edx # zero error code + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: + RESTORE_ALL_NMI cr3_reg=%edi + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack + jmp .Lirq_return +#endif +SYM_CODE_END(asm_exc_nmi) + +.pushsection .text, "ax" +SYM_CODE_START(rewind_stack_and_make_dead) + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp + + movl PER_CPU_VAR(cpu_current_top_of_stack), %esi + leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp + + call make_task_dead +1: jmp 1b +SYM_CODE_END(rewind_stack_and_make_dead) +.popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S new file mode 100644 index 000000000..9953d966d --- /dev/null +++ b/arch/x86/entry/entry_64.S @@ -0,0 +1,1530 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/arch/x86_64/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs + * Copyright (C) 2000 Pavel Machek + * + * entry.S contains the system-call and fault low-level handling routines. + * + * Some of this is documented in Documentation/x86/entry_64.rst + * + * A note on terminology: + * - iret frame: Architecture defined interrupt frame from SS to RIP + * at the top of the kernel process stack. + * + * Some macro usage: + * - SYM_FUNC_START/END:Define functions in the symbol table. + * - idtentry: Define exception entry points. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "calling.h" + +.code64 +.section .entry.text, "ax" + +/* + * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. + * + * This is the only entry point used for 64-bit system calls. The + * hardware interface is reasonably well designed and the register to + * argument mapping Linux uses fits well with the registers that are + * available when SYSCALL is used. + * + * SYSCALL instructions can be found inlined in libc implementations as + * well as some other programs and libraries. There are also a handful + * of SYSCALL instructions in the vDSO used, for example, as a + * clock_gettimeofday fallback. + * + * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, + * then loads new ss, cs, and rip from previously programmed MSRs. + * rflags gets masked by a value from another MSR (so CLD and CLAC + * are not needed). SYSCALL does not save anything on the stack + * and does not change rsp. + * + * Registers on entry: + * rax system call number + * rcx return address + * r11 saved rflags (note: r11 is callee-clobbered register in C ABI) + * rdi arg0 + * rsi arg1 + * rdx arg2 + * r10 arg3 (needs to be moved to rcx to conform to C ABI) + * r8 arg4 + * r9 arg5 + * (note: r12-r15, rbp, rbx are callee-preserved in C ABI) + * + * Only called from user space. + * + * When user can change pt_regs->foo always force IRET. That is because + * it deals with uncanonical addresses better. SYSRET has trouble + * with them due to bugs in both AMD and Intel CPUs. + */ + +SYM_CODE_START(entry_SYSCALL_64) + UNWIND_HINT_ENTRY + ENDBR + + swapgs + /* tss.sp2 is scratch space. */ + movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + +SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ + pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ +SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) + pushq %rax /* pt_regs->orig_ax */ + + PUSH_AND_CLEAR_REGS rax=$-ENOSYS + + /* IRQs are off. */ + movq %rsp, %rdi + /* Sign extend the lower 32bit as syscall numbers are treated as int */ + movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + + call do_syscall_64 /* returns with IRQs disabled */ + + /* + * Try to use SYSRET instead of IRET if we're returning to + * a completely clean 64-bit userspace context. If we're not, + * go to the slow exit path. + * In the Xen PV case we must use iret anyway. + */ + + ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \ + X86_FEATURE_XENPV + + movq RCX(%rsp), %rcx + movq RIP(%rsp), %r11 + + cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */ + jne swapgs_restore_regs_and_return_to_usermode + + /* + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP + * in kernel space. This essentially lets the user take over + * the kernel, since userspace controls RSP. + * + * If width of "canonical tail" ever becomes variable, this will need + * to be updated to remain correct on both old and new CPUs. + * + * Change top bits to match most significant bit (47th or 56th bit + * depending on paging mode) in the address. + */ +#ifdef CONFIG_X86_5LEVEL + ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \ + "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57 +#else + shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx +#endif + + /* If this changed %rcx, it was not canonical */ + cmpq %rcx, %r11 + jne swapgs_restore_regs_and_return_to_usermode + + cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ + jne swapgs_restore_regs_and_return_to_usermode + + movq R11(%rsp), %r11 + cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ + jne swapgs_restore_regs_and_return_to_usermode + + /* + * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot + * restore RF properly. If the slowpath sets it for whatever reason, we + * need to restore it correctly. + * + * SYSRET can restore TF, but unlike IRET, restoring TF results in a + * trap from userspace immediately after SYSRET. This would cause an + * infinite loop whenever #DB happens with register state that satisfies + * the opportunistic SYSRET conditions. For example, single-stepping + * this user code: + * + * movq $stuck_here, %rcx + * pushfq + * popq %r11 + * stuck_here: + * + * would never get past 'stuck_here'. + */ + testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 + jnz swapgs_restore_regs_and_return_to_usermode + + /* nothing to check for RSP */ + + cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ + jne swapgs_restore_regs_and_return_to_usermode + + /* + * We win! This label is here just for ease of understanding + * perf profiles. Nothing jumps here. + */ +syscall_return_via_sysret: + IBRS_EXIT + POP_REGS pop_rdi=0 + + /* + * Now all regs are restored except RSP and RDI. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY + + pushq RSP-RDI(%rdi) /* RSP */ + pushq (%rdi) /* RDI */ + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + STACKLEAK_ERASE_NOCLOBBER + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + + popq %rdi + popq %rsp +SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + swapgs + sysretq +SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 +SYM_CODE_END(entry_SYSCALL_64) + +/* + * %rdi: prev task + * %rsi: next task + */ +.pushsection .text, "ax" +SYM_FUNC_START(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in inactive_task_frame + */ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + /* switch stack */ + movq %rsp, TASK_threadsp(%rdi) + movq TASK_threadsp(%rsi), %rsp + +#ifdef CONFIG_STACKPROTECTOR + movq TASK_stack_canary(%rsi), %rbx + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset +#endif + + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + + /* restore callee-saved registers */ + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + + jmp __switch_to +SYM_FUNC_END(__switch_to_asm) +.popsection + +/* + * A newly forked process directly context switches into this address. + * + * rax: prev task we switched from + * rbx: kernel thread func (NULL for user thread) + * r12: kernel thread arg + */ +.pushsection .text, "ax" +SYM_CODE_START(ret_from_fork) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // copy_thread + movq %rax, %rdi + call schedule_tail /* rdi: 'prev' task parameter */ + + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ + +2: + UNWIND_HINT_REGS + movq %rsp, %rdi + call syscall_exit_to_user_mode /* returns with IRQs disabled */ + jmp swapgs_restore_regs_and_return_to_usermode + +1: + /* kernel thread */ + UNWIND_HINT_EMPTY + movq %r12, %rdi + CALL_NOSPEC rbx + /* + * A kernel thread is allowed to return here after successfully + * calling kernel_execve(). Exit to userspace to complete the execve() + * syscall. + */ + movq $0, RAX(%rsp) + jmp 2b +SYM_CODE_END(ret_from_fork) +.popsection + +.macro DEBUG_ENTRY_ASSERT_IRQS_OFF +#ifdef CONFIG_DEBUG_ENTRY + pushq %rax + SAVE_FLAGS + testl $X86_EFLAGS_IF, %eax + jz .Lokay_\@ + ud2 +.Lokay_\@: + popq %rax +#endif +.endm + +SYM_CODE_START_LOCAL(xen_error_entry) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + UNTRAIN_RET + RET +SYM_CODE_END(xen_error_entry) + +/** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called + * @has_error_code: Hardware pushed error code on stack + */ +.macro idtentry_body cfunc has_error_code:req + + /* + * Call error_entry() and switch to the task stack if from userspace. + * + * When in XENPV, it is already in the task stack, and it can't fault + * for native_iret() nor native_load_gs_index() since XENPV uses its + * own pvops for IRET and load_gs_index(). And it doesn't need to + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ + "call xen_error_entry", X86_FEATURE_XENPV + + ENCODE_FRAME_POINTER + UNWIND_HINT_REGS + + movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ + + .if \has_error_code == 1 + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .endif + + call \cfunc + + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + + jmp error_return +.endm + +/** + * idtentry - Macro to generate entry stubs for simple IDT entries + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * @has_error_code: Hardware pushed error code on stack + * + * The macro emits code to set up the kernel context for straight forward + * and simple IDT entries. No IST stack, no paranoid entry checks. + */ +.macro idtentry vector asmsym cfunc has_error_code:req +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ENDBR + ASM_CLAC + cld + + .if \has_error_code == 0 + pushq $-1 /* ORIG_RAX: no syscall to restart */ + .endif + + .if \vector == X86_TRAP_BP + /* + * If coming from kernel space, create a 6-word gap to allow the + * int3 handler to emulate a call instruction. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + + idtentry_body \cfunc \has_error_code + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm + +/* + * Interrupt entry/exit. + * + + The interrupt stubs push (vector) onto the stack, which is the error_code + * position of idtentry exceptions, and jump to one of the two idtentry points + * (common/spurious). + * + * common_interrupt is a hotpath, align it to a cache line + */ +.macro idtentry_irq vector cfunc + .p2align CONFIG_X86_L1_CACHE_SHIFT + idtentry \vector asm_\cfunc \cfunc has_error_code=1 +.endm + +/* + * System vectors which invoke their handlers directly and are not + * going through the regular common device interrupt handling code. + */ +.macro idtentry_sysvec vector cfunc + idtentry \vector asm_\cfunc \cfunc has_error_code=0 +.endm + +/** + * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * + * The macro emits code to set up the kernel context for #MC and #DB + * + * If the entry comes from user space it uses the normal entry path + * including the return to user space work and preemption checks on + * exit. + * + * If hits in kernel mode then it needs to go through the paranoid + * entry as the exception can hit any random state. No preemption + * check on exit to keep the paranoid path simple. + */ +.macro idtentry_mce_db vector asmsym cfunc +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS + ENDBR + ASM_CLAC + cld + + pushq $-1 /* ORIG_RAX: no syscall to restart */ + + /* + * If the entry is from userspace, switch stacks and treat it as + * a normal entry. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_switch_stack_\@ + + /* paranoid_entry returns GS information for paranoid_exit in EBX. */ + call paranoid_entry + + UNWIND_HINT_REGS + + movq %rsp, %rdi /* pt_regs pointer */ + + call \cfunc + + jmp paranoid_exit + + /* Switch to the regular task stack and use the noist entry point */ +.Lfrom_usermode_switch_stack_\@: + idtentry_body noist_\cfunc, has_error_code=0 + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm + +#ifdef CONFIG_AMD_MEM_ENCRYPT +/** + * idtentry_vc - Macro to generate entry stub for #VC + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * + * The macro emits code to set up the kernel context for #VC. The #VC handler + * runs on an IST stack and needs to be able to cause nested #VC exceptions. + * + * To make this work the #VC entry code tries its best to pretend it doesn't use + * an IST stack by switching to the task stack if coming from user-space (which + * includes early SYSCALL entry path) or back to the stack in the IRET frame if + * entered from kernel-mode. + * + * If entered from kernel-mode the return stack is validated first, and if it is + * not safe to use (e.g. because it points to the entry stack) the #VC handler + * will switch to a fall-back stack (VC2) and call a special handler function. + * + * The macro is only used for one vector, but it is planned to be extended in + * the future for the #HV exception. + */ +.macro idtentry_vc vector asmsym cfunc +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS + ENDBR + ASM_CLAC + cld + + /* + * If the entry is from userspace, switch stacks and treat it as + * a normal entry. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_switch_stack_\@ + + /* + * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX. + * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS + */ + call paranoid_entry + + UNWIND_HINT_REGS + + /* + * Switch off the IST stack to make it free for nested exceptions. The + * vc_switch_off_ist() function will switch back to the interrupted + * stack if it is safe to do so. If not it switches to the VC fall-back + * stack. + */ + movq %rsp, %rdi /* pt_regs pointer */ + call vc_switch_off_ist + movq %rax, %rsp /* Switch to new stack */ + + ENCODE_FRAME_POINTER + UNWIND_HINT_REGS + + /* Update pt_regs */ + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + + movq %rsp, %rdi /* pt_regs pointer */ + + call kernel_\cfunc + + /* + * No need to switch back to the IST stack. The current stack is either + * identical to the stack in the IRET frame or the VC fall-back stack, + * so it is definitely mapped even with PTI enabled. + */ + jmp paranoid_exit + + /* Switch to the regular task stack */ +.Lfrom_usermode_switch_stack_\@: + idtentry_body user_\cfunc, has_error_code=1 + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm +#endif + +/* + * Double fault entry. Straight paranoid. No checks from which context + * this comes because for the espfix induced #DF this would do the wrong + * thing. + */ +.macro idtentry_df vector asmsym cfunc +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS offset=8 + ENDBR + ASM_CLAC + cld + + /* paranoid_entry returns GS information for paranoid_exit in EBX. */ + call paranoid_entry + UNWIND_HINT_REGS + + movq %rsp, %rdi /* pt_regs pointer into first argument */ + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + call \cfunc + + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + + jmp paranoid_exit + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm + +/* + * Include the defines which emit the idt entries which are shared + * shared between 32 and 64 bit and emit the __irqentry_text_* markers + * so the stacktrace boundary checks work. + */ + .align 16 + .globl __irqentry_text_start +__irqentry_text_start: + +#include + + .align 16 + .globl __irqentry_text_end +__irqentry_text_end: + ANNOTATE_NOENDBR + +SYM_CODE_START_LOCAL(common_interrupt_return) +SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT +#ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testb $3, CS(%rsp) + jnz 1f + ud2 +1: +#endif +#ifdef CONFIG_XEN_PV + ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV +#endif + + POP_REGS pop_rdi=0 + + /* + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY + + /* Copy the IRET frame to the trampoline stack. */ + pushq 6*8(%rdi) /* SS */ + pushq 5*8(%rdi) /* RSP */ + pushq 4*8(%rdi) /* EFLAGS */ + pushq 3*8(%rdi) /* CS */ + pushq 2*8(%rdi) /* RIP */ + + /* Push user RDI on the trampoline stack. */ + pushq (%rdi) + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + STACKLEAK_ERASE_NOCLOBBER + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + + /* Restore RDI. */ + popq %rdi + swapgs + jmp .Lnative_iret + + +SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) +#ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates kernel mode. */ + testb $3, CS(%rsp) + jz 1f + ud2 +1: +#endif + POP_REGS + addq $8, %rsp /* skip regs->orig_ax */ + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization + * when returning from IPI handler. + */ +#ifdef CONFIG_XEN_PV +SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + .byte 0xe9 + .long .Lnative_iret - (. + 4) +#endif + +.Lnative_iret: + UNWIND_HINT_IRET_REGS + /* + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ +#ifdef CONFIG_X86_ESPFIX64 + testb $4, (SS-RIP)(%rsp) + jnz native_irq_return_ldt +#endif + +SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // exc_double_fault + /* + * This may fault. Non-paranoid faults on return to userspace are + * handled by fixup_bad_iret. These include #SS, #GP, and #NP. + * Double-faults due to espfix64 are handled in exc_double_fault. + * Other faults here are fatal. + */ + iretq + +#ifdef CONFIG_X86_ESPFIX64 +native_irq_return_ldt: + /* + * We are running with user GSBASE. All GPRs contain their user + * values. We have a percpu ESPFIX stack that is eight slots + * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom + * of the ESPFIX stack. + * + * We clobber RAX and RDI in this code. We stash RDI on the + * normal stack and RAX on the ESPFIX stack. + * + * The ESPFIX stack layout we set up looks like this: + * + * --- top of ESPFIX stack --- + * SS + * RSP + * RFLAGS + * CS + * RIP <-- RSP points here when we're done + * RAX <-- espfix_waddr points here + * --- bottom of ESPFIX stack --- + */ + + pushq %rdi /* Stash user RDI */ + swapgs /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* user RAX */ + movq (1*8)(%rsp), %rax /* user RIP */ + movq %rax, (1*8)(%rdi) + movq (2*8)(%rsp), %rax /* user CS */ + movq %rax, (2*8)(%rdi) + movq (3*8)(%rsp), %rax /* user RFLAGS */ + movq %rax, (3*8)(%rdi) + movq (5*8)(%rsp), %rax /* user SS */ + movq %rax, (5*8)(%rdi) + movq (4*8)(%rsp), %rax /* user RSP */ + movq %rax, (4*8)(%rdi) + /* Now RAX == RSP. */ + + andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ + + /* + * espfix_stack[31:16] == 0. The page tables are set up such that + * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of + * espfix_waddr for any X. That is, there are 65536 RO aliases of + * the same page. Set up RSP so that RSP[31:16] contains the + * respective 16 bits of the /userspace/ RSP and RSP nonetheless + * still points to an RO alias of the ESPFIX stack. + */ + orq PER_CPU_VAR(espfix_stack), %rax + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + swapgs /* to user GS */ + popq %rdi /* Restore user RDI */ + + movq %rax, %rsp + UNWIND_HINT_IRET_REGS offset=8 + + /* + * At this point, we cannot write to the stack any more, but we can + * still read. + */ + popq %rax /* Restore user RAX */ + + /* + * RSP now points to an ordinary IRET frame, except that the page + * is read-only and RSP[31:16] are preloaded with the userspace + * values. We can now IRET back to userspace. + */ + jmp native_irq_return_iret +#endif +SYM_CODE_END(common_interrupt_return) +_ASM_NOKPROBE(common_interrupt_return) + +/* + * Reload gs selector with exception handling + * edi: new selector + * + * Is in entry.text as it shouldn't be instrumented. + */ +SYM_FUNC_START(asm_load_gs_index) + FRAME_BEGIN + swapgs +.Lgs_change: + ANNOTATE_NOENDBR // error_entry + movl %edi, %gs +2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE + swapgs + FRAME_END + RET + + /* running with kernelgs */ +.Lbad_gs: + swapgs /* switch back to user gs */ +.macro ZAP_GS + /* This can't be a string because the preprocessor needs to see it. */ + movl $__USER_DS, %eax + movl %eax, %gs +.endm + ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG + xorl %eax, %eax + movl %eax, %gs + jmp 2b + + _ASM_EXTABLE(.Lgs_change, .Lbad_gs) + +SYM_FUNC_END(asm_load_gs_index) +EXPORT_SYMBOL(asm_load_gs_index) + +#ifdef CONFIG_XEN_PV +/* + * A note on the "critical region" in our callback handler. + * We want to avoid stacking callback handlers due to events occurring + * during handling of the last event. To do this, we keep events disabled + * until we've done all processing. HOWEVER, we must enable events before + * popping the stack frame (can't be done atomically) and so it would still + * be possible to get enough handler activations to overflow the stack. + * Although unlikely, bugs of that kind are hard to track down, so we'd + * like to avoid the possibility. + * So, on entry to the handler we detect whether we interrupted an + * existing activation in its critical region -- if so, we pop the current + * activation and restart the handler using the previous one. + * + * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) + */ +SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) + +/* + * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will + * see the correct pointer to the pt_regs + */ + UNWIND_HINT_FUNC + movq %rdi, %rsp /* we don't return, adjust the stack frame */ + UNWIND_HINT_REGS + + call xen_pv_evtchn_do_upcall + + jmp error_return +SYM_CODE_END(exc_xen_hypervisor_callback) + +/* + * Hypervisor uses this for application faults while it executes. + * We get here for two reasons: + * 1. Fault while reloading DS, ES, FS or GS + * 2. Fault while executing IRET + * Category 1 we do not need to fix up as Xen has already reloaded all segment + * registers that could be reloaded and zeroed the others. + * Category 2 we fix up by killing the current process. We cannot use the + * normal Linux return path in this case because if we use the IRET hypercall + * to pop the stack frame we end up in an infinite loop of failsafe callbacks. + * We distinguish between categories by comparing each saved segment register + * with its current contents: any discrepancy means we in category 1. + */ +SYM_CODE_START(xen_failsafe_callback) + UNWIND_HINT_EMPTY + ENDBR + movl %ds, %ecx + cmpw %cx, 0x10(%rsp) + jne 1f + movl %es, %ecx + cmpw %cx, 0x18(%rsp) + jne 1f + movl %fs, %ecx + cmpw %cx, 0x20(%rsp) + jne 1f + movl %gs, %ecx + cmpw %cx, 0x28(%rsp) + jne 1f + /* All segments match their saved values => Category 2 (Bad IRET). */ + movq (%rsp), %rcx + movq 8(%rsp), %r11 + addq $0x30, %rsp + pushq $0 /* RIP */ + UNWIND_HINT_IRET_REGS offset=8 + jmp asm_exc_general_protection +1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ + movq (%rsp), %rcx + movq 8(%rsp), %r11 + addq $0x30, %rsp + UNWIND_HINT_IRET_REGS + pushq $-1 /* orig_ax = -1 => not a system call */ + PUSH_AND_CLEAR_REGS + ENCODE_FRAME_POINTER + jmp error_return +SYM_CODE_END(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ + +/* + * Save all registers in pt_regs. Return GSBASE related information + * in EBX depending on the availability of the FSGSBASE instructions: + * + * FSGSBASE R/EBX + * N 0 -> SWAPGS on exit + * 1 -> no SWAPGS on exit + * + * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL + */ +SYM_CODE_START_LOCAL(paranoid_entry) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + + /* + * Always stash CR3 in %r14. This value will be restored, + * verbatim, at exit. Needed if paranoid_entry interrupted + * another entry that already switched to the user CR3 value + * but has not yet returned to userspace. + * + * This is also why CS (stashed in the "iret frame" by the + * hardware at entry) can not be used: this may be a return + * to kernel code, but with a user CR3 value. + * + * Switching CR3 does not depend on kernel GSBASE so it can + * be done before switching to the kernel GSBASE. This is + * required for FSGSBASE because the kernel GSBASE has to + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + + /* + * Handling GSBASE depends on the availability of FSGSBASE. + * + * Without FSGSBASE the kernel enforces that negative GSBASE + * values indicate kernel GSBASE. With FSGSBASE no assumptions + * can be made about the GSBASE value when entering from user + * space. + */ + ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE + + /* + * Read the current GSBASE and store it in %rbx unconditionally, + * retrieve and set the current CPUs kernel GSBASE. The stored value + * has to be restored in paranoid_exit unconditionally. + * + * The unconditional write to GS base below ensures that no subsequent + * loads based on a mispredicted GS base can happen, therefore no LFENCE + * is needed here. + */ + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx + jmp .Lparanoid_gsbase_done + +.Lparanoid_entry_checkgs: + /* EBX = 1 -> kernel GSBASE active, no restore required */ + movl $1, %ebx + + /* + * The kernel-enforced convention is a negative GSBASE indicates + * a kernel value. No SWAPGS needed on entry and exit. + */ + movl $MSR_GS_BASE, %ecx + rdmsr + testl %edx, %edx + js .Lparanoid_kernel_gsbase + + /* EBX = 0 -> SWAPGS required on exit */ + xorl %ebx, %ebx + swapgs +.Lparanoid_kernel_gsbase: + FENCE_SWAPGS_KERNEL_ENTRY +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + + RET +SYM_CODE_END(paranoid_entry) + +/* + * "Paranoid" exit path from exception stack. This is invoked + * only on return from non-NMI IST interrupts that came + * from kernel space. + * + * We may be returning to very strange contexts (e.g. very early + * in syscall entry), so checking for preemption here would + * be complicated. Fortunately, there's no good reason to try + * to handle preemption here. + * + * R/EBX contains the GSBASE related information depending on the + * availability of the FSGSBASE instructions: + * + * FSGSBASE R/EBX + * N 0 -> SWAPGS on exit + * 1 -> no SWAPGS on exit + * + * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL + */ +SYM_CODE_START_LOCAL(paranoid_exit) + UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + + /* + * The order of operations is important. RESTORE_CR3 requires + * kernel GSBASE. + * + * NB to anyone to try to optimize this code: this code does + * not execute at all for exceptions from user mode. Those + * exceptions go through error_exit instead. + */ + RESTORE_CR3 scratch_reg=%rax save_reg=%r14 + + /* Handle the three GSBASE cases */ + ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE + + /* With FSGSBASE enabled, unconditionally restore GSBASE */ + wrgsbase %rbx + jmp restore_regs_and_return_to_kernel + +.Lparanoid_exit_checkgs: + /* On non-FSGSBASE systems, conditionally do SWAPGS */ + testl %ebx, %ebx + jnz restore_regs_and_return_to_kernel + + /* We are returning to a context with user GSBASE */ + swapgs + jmp restore_regs_and_return_to_kernel +SYM_CODE_END(paranoid_exit) + +/* + * Switch GS and CR3 if needed. + */ +SYM_CODE_START_LOCAL(error_entry) + UNWIND_HINT_FUNC + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + + testb $3, CS+8(%rsp) + jz .Lerror_kernelspace + + /* + * We entered from user mode or we're pretending to have entered + * from user mode due to an IRET fault. + */ + swapgs + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET + + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ +.Lerror_entry_from_usermode_after_swapgs: + + /* Put us onto the real thread stack. */ + call sync_regs + RET + + /* + * There are two places in the kernel that can potentially fault with + * usergs. Handle them here. B stepping K8s sometimes report a + * truncated RIP for IRET exceptions returning to compat mode. Check + * for these here too. + */ +.Lerror_kernelspace: + leaq native_irq_return_iret(%rip), %rcx + cmpq %rcx, RIP+8(%rsp) + je .Lerror_bad_iret + movl %ecx, %eax /* zero extend */ + cmpq %rax, RIP+8(%rsp) + je .Lbstep_iret + cmpq $.Lgs_change, RIP+8(%rsp) + jne .Lerror_entry_done_lfence + + /* + * hack: .Lgs_change can fail with user gsbase. If this happens, fix up + * gsbase and proceed. We'll fix up the exception and land in + * .Lgs_change's error handler with kernel gsbase. + */ + swapgs + + /* + * Issue an LFENCE to prevent GS speculation, regardless of whether it is a + * kernel or user gsbase. + */ +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END + RET + +.Lbstep_iret: + /* Fix truncated RIP */ + movq %rcx, RIP+8(%rsp) + /* fall through */ + +.Lerror_bad_iret: + /* + * We came from an IRET to user mode, so we have user + * gsbase and CR3. Switch to kernel gsbase and CR3: + */ + swapgs + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET + + /* + * Pretend that the exception came from user mode: set up pt_regs + * as if we faulted immediately after IRET. + */ + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + call fixup_bad_iret + mov %rax, %rdi + jmp .Lerror_entry_from_usermode_after_swapgs +SYM_CODE_END(error_entry) + +SYM_CODE_START_LOCAL(error_return) + UNWIND_HINT_REGS + DEBUG_ENTRY_ASSERT_IRQS_OFF + testb $3, CS(%rsp) + jz restore_regs_and_return_to_kernel + jmp swapgs_restore_regs_and_return_to_usermode +SYM_CODE_END(error_return) + +/* + * Runs on exception stack. Xen PV does not go through this path at all, + * so we can use real assembly here. + * + * Registers: + * %r14: Used to save/restore the CR3 of the interrupted context + * when PAGE_TABLE_ISOLATION is in use. Do not clobber. + */ +SYM_CODE_START(asm_exc_nmi) + UNWIND_HINT_IRET_REGS + ENDBR + + /* + * We allow breakpoints in NMIs. If a breakpoint occurs, then + * the iretq it performs will take us out of NMI context. + * This means that we can have nested NMIs where the next + * NMI is using the top of the stack of the previous NMI. We + * can't let it execute because the nested NMI will corrupt the + * stack of the previous NMI. NMI handlers are not re-entrant + * anyway. + * + * To handle this case we do the following: + * Check the a special location on the stack that contains + * a variable that is set when NMIs are executing. + * The interrupted task's stack is also checked to see if it + * is an NMI stack. + * If the variable is not set and the stack is not the NMI + * stack then: + * o Set the special variable on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack + * o Continue processing the NMI + * If the variable is set or the previous stack is the NMI stack: + * o Modify the "iret" location to jump to the repeat_nmi + * o return back to the first NMI + * + * Now on exit of the first NMI, we first clear the stack variable + * The NMI stack will tell any nested NMIs at that point that it is + * nested. Then we pop the stack normally with iret, and if there was + * a nested NMI that updated the copy interrupt stack frame, a + * jump will be made to the repeat_nmi code that will handle the second + * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. + */ + + ASM_CLAC + cld + + /* Use %rdx as our temp variable throughout */ + pushq %rdx + + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + + /* + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. + * + * We also must not push anything to the stack before switching + * stacks lest we corrupt the "NMI executing" variable. + */ + + swapgs + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + UNWIND_HINT_IRET_REGS + pushq $-1 /* pt_regs->orig_ax */ + PUSH_AND_CLEAR_REGS rdx=(%rdx) + ENCODE_FRAME_POINTER + + IBRS_ENTER + UNTRAIN_RET + + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + + movq %rsp, %rdi + movq $-1, %rsi + call exc_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. + */ + jmp swapgs_restore_regs_and_return_to_usermode + +.Lnmi_from_kernel: + /* + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + + /* + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call exc_nmi() anyway, so we can just + * resume the outer NMI. + */ + + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: + + /* + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. + */ + cmpl $1, -8(%rsp) + je nested_nmi + + /* + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". + */ + lea 6*8(%rsp), %rdx + /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ + cmpq %rdx, 4*8(%rsp) + /* If the stack pointer is above the NMI stack, this is a normal NMI */ + ja first_nmi + + subq $EXCEPTION_STKSZ, %rdx + cmpq %rdx, 4*8(%rsp) + /* If it is below the NMI stack, it is a normal NMI */ + jb first_nmi + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ + +nested_nmi: + /* + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. + */ + subq $8, %rsp + leaq -10*8(%rsp), %rdx + pushq $__KERNEL_DS + pushq %rdx + pushfq + pushq $__KERNEL_CS + pushq $repeat_nmi + + /* Put stack back */ + addq $(6*8), %rsp + +nested_nmi_out: + popq %rdx + + /* We are returning to kernel mode, so this cannot result in a fault. */ + iretq + +first_nmi: + /* Restore rdx. */ + movq (%rsp), %rdx + + /* Make room for "NMI executing". */ + pushq $0 + + /* Leave room for the "iret" frame */ + subq $(5*8), %rsp + + /* Copy the "original" frame to the "outermost" frame */ + .rept 5 + pushq 11*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS + + /* Everything up to here is safe from nested NMIs */ + +#ifdef CONFIG_DEBUG_ENTRY + /* + * For ease of testing, unmask NMIs right away. Disabled by + * default because IRET is very expensive. + */ + pushq $0 /* SS */ + pushq %rsp /* RSP (minus 8 because of the previous push) */ + addq $8, (%rsp) /* Fix up RSP */ + pushfq /* RFLAGS */ + pushq $__KERNEL_CS /* CS */ + pushq $1f /* RIP */ + iretq /* continues at repeat_nmi below */ + UNWIND_HINT_IRET_REGS +1: +#endif + +repeat_nmi: + ANNOTATE_NOENDBR // this code + /* + * If there was a nested NMI, the first NMI's iret will return + * here. But NMIs are still enabled and we can take another + * nested NMI. The nested NMI checks the interrupted RIP to see + * if it is between repeat_nmi and end_repeat_nmi, and if so + * it will just return, as we are about to repeat an NMI anyway. + * This makes it safe to copy to the stack frame that a nested + * NMI will update. + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call exc_nmi(). "NMI executing" + * is zero. + */ + movq $1, 10*8(%rsp) /* Set "NMI executing". */ + + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ + addq $(10*8), %rsp + .rept 5 + pushq -6*8(%rsp) + .endr + subq $(5*8), %rsp +end_repeat_nmi: + ANNOTATE_NOENDBR // this code + + /* + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. + */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ + + /* + * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit + * as we should not be calling schedule in NMI context. + * Even with normal interrupts enabled. An NMI should not be + * setting NEED_RESCHED or anything that normal interrupts and + * exceptions might do. + */ + call paranoid_entry + UNWIND_HINT_REGS + + movq %rsp, %rdi + movq $-1, %rsi + call exc_nmi + + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + + /* + * The above invocation of paranoid_entry stored the GSBASE + * related information in R/EBX depending on the availability + * of FSGSBASE. + * + * If FSGSBASE is enabled, restore the saved GSBASE value + * unconditionally, otherwise take the conditional SWAPGS path. + */ + ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE + + wrgsbase %rbx + jmp nmi_restore + +nmi_no_fsgsbase: + /* EBX == 0 -> invoke SWAPGS */ + testl %ebx, %ebx + jnz nmi_restore + +nmi_swapgs: + swapgs + +nmi_restore: + POP_REGS + + /* + * Skip orig_ax and the "outermost" frame to point RSP at the "iret" + * at the "iret" frame. + */ + addq $6*8, %rsp + + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. + * + * We arguably should just inspect RIP instead, but I (Andy) wrote + * this code when I had the misapprehension that Xen PV supported + * NMIs, and Xen PV would break that approach. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * iretq reads the "iret" frame and exits the NMI stack in a + * single instruction. We are returning to kernel mode, so this + * cannot result in a fault. Similarly, we don't need to worry + * about espfix64 on the way back to kernel mode. + */ + iretq +SYM_CODE_END(asm_exc_nmi) + +#ifndef CONFIG_IA32_EMULATION +/* + * This handles SYSCALL from 32-bit code. There is no way to program + * MSRs to fully disable 32-bit SYSCALL. + */ +SYM_CODE_START(ignore_sysret) + UNWIND_HINT_EMPTY + ENDBR + mov $-ENOSYS, %eax + sysretl +SYM_CODE_END(ignore_sysret) +#endif + +.pushsection .text, "ax" +SYM_CODE_START(rewind_stack_and_make_dead) + UNWIND_HINT_FUNC + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp + + movq PER_CPU_VAR(cpu_current_top_of_stack), %rax + leaq -PTREGS_SIZE(%rax), %rsp + UNWIND_HINT_REGS + + call make_task_dead +SYM_CODE_END(rewind_stack_and_make_dead) +.popsection diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S new file mode 100644 index 000000000..d6c08d898 --- /dev/null +++ b/arch/x86/entry/entry_64_compat.S @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Compatibility mode system call entry point for x86-64. + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "calling.h" + + .section .entry.text, "ax" + +/* + * 32-bit SYSENTER entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * on 64-bit kernels running on Intel CPUs. + * + * The SYSENTER instruction, in principle, should *only* occur in the + * vDSO. In practice, a small number of Android devices were shipped + * with a copy of Bionic that inlined a SYSENTER instruction. This + * never happened in any of Google's Bionic versions -- it only happened + * in a narrow range of Intel-provided versions. + * + * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs. + * IF and VM in RFLAGS are cleared (IOW: interrupts are off). + * SYSENTER does not save anything on the stack, + * and does not save old RIP (!!!), RSP, or RFLAGS. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp user stack + * 0(%ebp) arg6 + */ +SYM_CODE_START(entry_SYSENTER_compat) + UNWIND_HINT_ENTRY + ENDBR + /* Interrupts are off on entry. */ + swapgs + + pushq %rax + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + popq %rax + + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ + pushq $0 /* pt_regs->sp = 0 (placeholder) */ + + /* + * Push flags. This is nasty. First, interrupts are currently + * off, but we need pt_regs->flags to have IF set. Second, if TS + * was set in usermode, it's still set, and we're singlestepping + * through this code. do_SYSENTER_32() will fix up IF. + */ + pushfq /* pt_regs->flags (except IF = 0) */ + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ +SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + + /* + * User tracing code (ptrace or signal handlers) might assume that + * the saved RAX contains a 32-bit number when we're invoking a 32-bit + * syscall. Just in case the high bits are nonzero, zero-extend + * the syscall number. (This could almost certainly be deleted + * with no ill effects.) + */ + movl %eax, %eax + + pushq %rax /* pt_regs->orig_ax */ + PUSH_AND_CLEAR_REGS rax=$-ENOSYS + UNWIND_HINT_REGS + + cld + + IBRS_ENTER + UNTRAIN_RET + + /* + * SYSENTER doesn't filter flags, so we need to clear NT and AC + * ourselves. To save a few cycles, we can check whether + * either was set instead of doing an unconditional popfq. + * This needs to happen before enabling interrupts so that + * we don't get preempted with NT set. + * + * If TF is set, we will single-step all the way to here -- do_debug + * will ignore all the traps. (Yes, this is slow, but so is + * single-stepping in general. This allows us to avoid having + * a more complicated code to handle the case where a user program + * forces us to single-step through the SYSENTER entry code.) + * + * NB.: .Lsysenter_fix_flags is a label with the code under it moved + * out-of-line as an optimization: NT is unlikely to be set in the + * majority of the cases and instead of polluting the I$ unnecessarily, + * we're keeping that code behind a branch which will predict as + * not-taken and therefore its instructions won't be fetched. + */ + testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp) + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: + + movq %rsp, %rdi + call do_SYSENTER_32 + /* XEN PV guests always use IRET path */ + ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV + jmp sysret32_from_system_call + +.Lsysenter_fix_flags: + pushq $X86_EFLAGS_FIXED + popfq + jmp .Lsysenter_flags_fixed +SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // is_sysenter_singlestep +SYM_CODE_END(entry_SYSENTER_compat) + +/* + * 32-bit SYSCALL entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * on 64-bit kernels running on AMD CPUs. + * + * The SYSCALL instruction, in principle, should *only* occur in the + * vDSO. In practice, it appears that this really is the case. + * As evidence: + * + * - The calling convention for SYSCALL has changed several times without + * anyone noticing. + * + * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything + * user task that did SYSCALL without immediately reloading SS + * would randomly crash. + * + * - Most programmers do not directly target AMD CPUs, and the 32-bit + * SYSCALL instruction does not exist on Intel CPUs. Even on AMD + * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels + * because the SYSCALL instruction in legacy/native 32-bit mode (as + * opposed to compat mode) is sufficiently poorly designed as to be + * essentially unusable. + * + * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves + * RFLAGS to R11, then loads new SS, CS, and RIP from previously + * programmed MSRs. RFLAGS gets masked by a value from another MSR + * (so CLD and CLAC are not needed). SYSCALL does not save anything on + * the stack and does not change RSP. + * + * Note: RFLAGS saving+masking-with-MSR happens only in Long mode + * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). + * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit + * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes + * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). + * + * Arguments: + * eax system call number + * ecx return address + * ebx arg1 + * ebp arg2 (note: not saved in the stack frame, should not be touched) + * edx arg3 + * esi arg4 + * edi arg5 + * esp user stack + * 0(%esp) arg6 + */ +SYM_CODE_START(entry_SYSCALL_compat) + UNWIND_HINT_ENTRY + ENDBR + /* Interrupts are off on entry. */ + swapgs + + /* Stash user ESP */ + movl %esp, %r8d + + /* Use %rsp as scratch reg. User ESP is stashed in r8 */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + + /* Switch to the kernel stack */ + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + +SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ + pushq %r8 /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER32_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ +SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) + movl %eax, %eax /* discard orig_ax high bits */ + pushq %rax /* pt_regs->orig_ax */ + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS + UNWIND_HINT_REGS + + IBRS_ENTER + UNTRAIN_RET + + movq %rsp, %rdi + call do_fast_syscall_32 + /* XEN PV guests always use IRET path */ + ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV + + /* Opportunistic SYSRET */ +sysret32_from_system_call: + /* + * We are not going to return to userspace from the trampoline + * stack. So let's erase the thread stack right now. + */ + STACKLEAK_ERASE + + IBRS_EXIT + + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ + movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */ + addq $RAX, %rsp /* Skip r8-r15 */ + popq %rax /* pt_regs->rax */ + popq %rdx /* Skip pt_regs->cx */ + popq %rdx /* pt_regs->dx */ + popq %rsi /* pt_regs->si */ + popq %rdi /* pt_regs->di */ + + /* + * USERGS_SYSRET32 does: + * GSBASE = user's GS base + * EIP = ECX + * RFLAGS = R11 + * CS = __USER32_CS + * SS = __USER_DS + * + * ECX will not match pt_regs->cx, but we're returning to a vDSO + * trampoline that will fix up RCX, so this is okay. + * + * R12-R15 are callee-saved, so they contain whatever was in them + * when the system call started, which is already known to user + * code. We zero R8-R10 to avoid info leaks. + */ + movq RSP-ORIG_RAX(%rsp), %rsp +SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + + /* + * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored + * on the process stack which is not mapped to userspace and + * not readable after we SWITCH_TO_USER_CR3. Delay the CR3 + * switch until after after the last reference to the process + * stack. + * + * %r8/%r9 are zeroed before the sysret, thus safe to clobber. + */ + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 + + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + swapgs + sysretl +SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 +SYM_CODE_END(entry_SYSCALL_compat) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c new file mode 100644 index 000000000..8cfc9bc73 --- /dev/null +++ b/arch/x86/entry/syscall_32.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* System call table for i386. */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_IA32_EMULATION +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) +#else +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) +#endif + +#define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *); + +#include +#undef __SYSCALL + +#define __SYSCALL(nr, sym) __ia32_##sym, + +__visible const sys_call_ptr_t ia32_sys_call_table[] = { +#include +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c new file mode 100644 index 000000000..be120eec1 --- /dev/null +++ b/arch/x86/entry/syscall_64.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* System call table for x86-64. */ + +#include +#include +#include +#include +#include + +#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); +#include +#undef __SYSCALL + +#define __SYSCALL(nr, sym) __x64_##sym, + +asmlinkage const sys_call_ptr_t sys_call_table[] = { +#include +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c new file mode 100644 index 000000000..bdd0e03a1 --- /dev/null +++ b/arch/x86/entry/syscall_x32.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* System call table for x32 ABI. */ + +#include +#include +#include +#include +#include + +#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); +#include +#undef __SYSCALL + +#define __SYSCALL(nr, sym) __x64_##sym, + +asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { +#include +}; diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile new file mode 100644 index 000000000..eca5d6eff --- /dev/null +++ b/arch/x86/entry/syscalls/Makefile @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0 +out := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm + +# Create output directory if not already present +$(shell mkdir -p $(out) $(uapi)) + +syscall32 := $(src)/syscall_32.tbl +syscall64 := $(src)/syscall_64.tbl + +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh +offset := +prefix := + +quiet_cmd_syshdr = SYSHDR $@ + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) --emit-nr \ + $(if $(offset),--offset $(offset)) \ + $(if $(prefix),--prefix $(prefix)) \ + $< $@ +quiet_cmd_systbl = SYSTBL $@ + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@ + +quiet_cmd_hypercalls = HYPERCALLS $@ + cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs)) + +$(uapi)/unistd_32.h: abis := i386 +$(uapi)/unistd_32.h: $(syscall32) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(out)/unistd_32_ia32.h: abis := i386 +$(out)/unistd_32_ia32.h: prefix := ia32_ +$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(uapi)/unistd_x32.h: abis := common,x32 +$(uapi)/unistd_x32.h: offset := __X32_SYSCALL_BIT +$(uapi)/unistd_x32.h: $(syscall64) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(uapi)/unistd_64.h: abis := common,64 +$(uapi)/unistd_64.h: $(syscall64) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(out)/unistd_64_x32.h: abis := x32 +$(out)/unistd_64_x32.h: prefix := x32_ +$(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(out)/syscalls_32.h: abis := i386 +$(out)/syscalls_32.h: $(syscall32) $(systbl) FORCE + $(call if_changed,systbl) +$(out)/syscalls_64.h: abis := common,64 +$(out)/syscalls_64.h: $(syscall64) $(systbl) FORCE + $(call if_changed,systbl) +$(out)/syscalls_x32.h: abis := common,x32 +$(out)/syscalls_x32.h: $(syscall64) $(systbl) FORCE + $(call if_changed,systbl) + +$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh FORCE + $(call if_changed,hypercalls) + +$(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h + +uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h +syshdr-y += syscalls_32.h +syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h +syshdr-$(CONFIG_X86_64) += syscalls_64.h +syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h +syshdr-$(CONFIG_XEN) += xen-hypercalls.h + +uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) +syshdr-y := $(addprefix $(out)/, $(syshdr-y)) +targets += $(addprefix ../../../../, $(uapisyshdr-y) $(syshdr-y)) + +PHONY += all +all: $(uapisyshdr-y) $(syshdr-y) + @: diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl new file mode 100644 index 000000000..320480a8d --- /dev/null +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -0,0 +1,457 @@ +# +# 32-bit system call numbers and entry vectors +# +# The format is: +# +# +# The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for +# sys_*() system calls and compat_sys_*() compat system calls if +# IA32_EMULATION is defined, and expect struct pt_regs *regs as their only +# parameter. +# +# The abi is always "i386" for this file. +# +0 i386 restart_syscall sys_restart_syscall +1 i386 exit sys_exit +2 i386 fork sys_fork +3 i386 read sys_read +4 i386 write sys_write +5 i386 open sys_open compat_sys_open +6 i386 close sys_close +7 i386 waitpid sys_waitpid +8 i386 creat sys_creat +9 i386 link sys_link +10 i386 unlink sys_unlink +11 i386 execve sys_execve compat_sys_execve +12 i386 chdir sys_chdir +13 i386 time sys_time32 +14 i386 mknod sys_mknod +15 i386 chmod sys_chmod +16 i386 lchown sys_lchown16 +17 i386 break +18 i386 oldstat sys_stat +19 i386 lseek sys_lseek compat_sys_lseek +20 i386 getpid sys_getpid +21 i386 mount sys_mount +22 i386 umount sys_oldumount +23 i386 setuid sys_setuid16 +24 i386 getuid sys_getuid16 +25 i386 stime sys_stime32 +26 i386 ptrace sys_ptrace compat_sys_ptrace +27 i386 alarm sys_alarm +28 i386 oldfstat sys_fstat +29 i386 pause sys_pause +30 i386 utime sys_utime32 +31 i386 stty +32 i386 gtty +33 i386 access sys_access +34 i386 nice sys_nice +35 i386 ftime +36 i386 sync sys_sync +37 i386 kill sys_kill +38 i386 rename sys_rename +39 i386 mkdir sys_mkdir +40 i386 rmdir sys_rmdir +41 i386 dup sys_dup +42 i386 pipe sys_pipe +43 i386 times sys_times compat_sys_times +44 i386 prof +45 i386 brk sys_brk +46 i386 setgid sys_setgid16 +47 i386 getgid sys_getgid16 +48 i386 signal sys_signal +49 i386 geteuid sys_geteuid16 +50 i386 getegid sys_getegid16 +51 i386 acct sys_acct +52 i386 umount2 sys_umount +53 i386 lock +54 i386 ioctl sys_ioctl compat_sys_ioctl +55 i386 fcntl sys_fcntl compat_sys_fcntl64 +56 i386 mpx +57 i386 setpgid sys_setpgid +58 i386 ulimit +59 i386 oldolduname sys_olduname +60 i386 umask sys_umask +61 i386 chroot sys_chroot +62 i386 ustat sys_ustat compat_sys_ustat +63 i386 dup2 sys_dup2 +64 i386 getppid sys_getppid +65 i386 getpgrp sys_getpgrp +66 i386 setsid sys_setsid +67 i386 sigaction sys_sigaction compat_sys_sigaction +68 i386 sgetmask sys_sgetmask +69 i386 ssetmask sys_ssetmask +70 i386 setreuid sys_setreuid16 +71 i386 setregid sys_setregid16 +72 i386 sigsuspend sys_sigsuspend +73 i386 sigpending sys_sigpending compat_sys_sigpending +74 i386 sethostname sys_sethostname +75 i386 setrlimit sys_setrlimit compat_sys_setrlimit +76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit +77 i386 getrusage sys_getrusage compat_sys_getrusage +78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday +79 i386 settimeofday sys_settimeofday compat_sys_settimeofday +80 i386 getgroups sys_getgroups16 +81 i386 setgroups sys_setgroups16 +82 i386 select sys_old_select compat_sys_old_select +83 i386 symlink sys_symlink +84 i386 oldlstat sys_lstat +85 i386 readlink sys_readlink +86 i386 uselib sys_uselib +87 i386 swapon sys_swapon +88 i386 reboot sys_reboot +89 i386 readdir sys_old_readdir compat_sys_old_readdir +90 i386 mmap sys_old_mmap compat_sys_ia32_mmap +91 i386 munmap sys_munmap +92 i386 truncate sys_truncate compat_sys_truncate +93 i386 ftruncate sys_ftruncate compat_sys_ftruncate +94 i386 fchmod sys_fchmod +95 i386 fchown sys_fchown16 +96 i386 getpriority sys_getpriority +97 i386 setpriority sys_setpriority +98 i386 profil +99 i386 statfs sys_statfs compat_sys_statfs +100 i386 fstatfs sys_fstatfs compat_sys_fstatfs +101 i386 ioperm sys_ioperm +102 i386 socketcall sys_socketcall compat_sys_socketcall +103 i386 syslog sys_syslog +104 i386 setitimer sys_setitimer compat_sys_setitimer +105 i386 getitimer sys_getitimer compat_sys_getitimer +106 i386 stat sys_newstat compat_sys_newstat +107 i386 lstat sys_newlstat compat_sys_newlstat +108 i386 fstat sys_newfstat compat_sys_newfstat +109 i386 olduname sys_uname +110 i386 iopl sys_iopl +111 i386 vhangup sys_vhangup +112 i386 idle +113 i386 vm86old sys_vm86old sys_ni_syscall +114 i386 wait4 sys_wait4 compat_sys_wait4 +115 i386 swapoff sys_swapoff +116 i386 sysinfo sys_sysinfo compat_sys_sysinfo +117 i386 ipc sys_ipc compat_sys_ipc +118 i386 fsync sys_fsync +119 i386 sigreturn sys_sigreturn compat_sys_sigreturn +120 i386 clone sys_clone compat_sys_ia32_clone +121 i386 setdomainname sys_setdomainname +122 i386 uname sys_newuname +123 i386 modify_ldt sys_modify_ldt +124 i386 adjtimex sys_adjtimex_time32 +125 i386 mprotect sys_mprotect +126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask +127 i386 create_module +128 i386 init_module sys_init_module +129 i386 delete_module sys_delete_module +130 i386 get_kernel_syms +131 i386 quotactl sys_quotactl +132 i386 getpgid sys_getpgid +133 i386 fchdir sys_fchdir +134 i386 bdflush sys_ni_syscall +135 i386 sysfs sys_sysfs +136 i386 personality sys_personality +137 i386 afs_syscall +138 i386 setfsuid sys_setfsuid16 +139 i386 setfsgid sys_setfsgid16 +140 i386 _llseek sys_llseek +141 i386 getdents sys_getdents compat_sys_getdents +142 i386 _newselect sys_select compat_sys_select +143 i386 flock sys_flock +144 i386 msync sys_msync +145 i386 readv sys_readv +146 i386 writev sys_writev +147 i386 getsid sys_getsid +148 i386 fdatasync sys_fdatasync +149 i386 _sysctl sys_ni_syscall +150 i386 mlock sys_mlock +151 i386 munlock sys_munlock +152 i386 mlockall sys_mlockall +153 i386 munlockall sys_munlockall +154 i386 sched_setparam sys_sched_setparam +155 i386 sched_getparam sys_sched_getparam +156 i386 sched_setscheduler sys_sched_setscheduler +157 i386 sched_getscheduler sys_sched_getscheduler +158 i386 sched_yield sys_sched_yield +159 i386 sched_get_priority_max sys_sched_get_priority_max +160 i386 sched_get_priority_min sys_sched_get_priority_min +161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 i386 nanosleep sys_nanosleep_time32 +163 i386 mremap sys_mremap +164 i386 setresuid sys_setresuid16 +165 i386 getresuid sys_getresuid16 +166 i386 vm86 sys_vm86 sys_ni_syscall +167 i386 query_module +168 i386 poll sys_poll +169 i386 nfsservctl +170 i386 setresgid sys_setresgid16 +171 i386 getresgid sys_getresgid16 +172 i386 prctl sys_prctl +173 i386 rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn +174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction +175 i386 rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask +176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending +177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32 +178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo +179 i386 rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend +180 i386 pread64 sys_ia32_pread64 +181 i386 pwrite64 sys_ia32_pwrite64 +182 i386 chown sys_chown16 +183 i386 getcwd sys_getcwd +184 i386 capget sys_capget +185 i386 capset sys_capset +186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack +187 i386 sendfile sys_sendfile compat_sys_sendfile +188 i386 getpmsg +189 i386 putpmsg +190 i386 vfork sys_vfork +191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit +192 i386 mmap2 sys_mmap_pgoff +193 i386 truncate64 sys_ia32_truncate64 +194 i386 ftruncate64 sys_ia32_ftruncate64 +195 i386 stat64 sys_stat64 compat_sys_ia32_stat64 +196 i386 lstat64 sys_lstat64 compat_sys_ia32_lstat64 +197 i386 fstat64 sys_fstat64 compat_sys_ia32_fstat64 +198 i386 lchown32 sys_lchown +199 i386 getuid32 sys_getuid +200 i386 getgid32 sys_getgid +201 i386 geteuid32 sys_geteuid +202 i386 getegid32 sys_getegid +203 i386 setreuid32 sys_setreuid +204 i386 setregid32 sys_setregid +205 i386 getgroups32 sys_getgroups +206 i386 setgroups32 sys_setgroups +207 i386 fchown32 sys_fchown +208 i386 setresuid32 sys_setresuid +209 i386 getresuid32 sys_getresuid +210 i386 setresgid32 sys_setresgid +211 i386 getresgid32 sys_getresgid +212 i386 chown32 sys_chown +213 i386 setuid32 sys_setuid +214 i386 setgid32 sys_setgid +215 i386 setfsuid32 sys_setfsuid +216 i386 setfsgid32 sys_setfsgid +217 i386 pivot_root sys_pivot_root +218 i386 mincore sys_mincore +219 i386 madvise sys_madvise +220 i386 getdents64 sys_getdents64 +221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64 +# 222 is unused +# 223 is unused +224 i386 gettid sys_gettid +225 i386 readahead sys_ia32_readahead +226 i386 setxattr sys_setxattr +227 i386 lsetxattr sys_lsetxattr +228 i386 fsetxattr sys_fsetxattr +229 i386 getxattr sys_getxattr +230 i386 lgetxattr sys_lgetxattr +231 i386 fgetxattr sys_fgetxattr +232 i386 listxattr sys_listxattr +233 i386 llistxattr sys_llistxattr +234 i386 flistxattr sys_flistxattr +235 i386 removexattr sys_removexattr +236 i386 lremovexattr sys_lremovexattr +237 i386 fremovexattr sys_fremovexattr +238 i386 tkill sys_tkill +239 i386 sendfile64 sys_sendfile64 +240 i386 futex sys_futex_time32 +241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity +242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity +243 i386 set_thread_area sys_set_thread_area +244 i386 get_thread_area sys_get_thread_area +245 i386 io_setup sys_io_setup compat_sys_io_setup +246 i386 io_destroy sys_io_destroy +247 i386 io_getevents sys_io_getevents_time32 +248 i386 io_submit sys_io_submit compat_sys_io_submit +249 i386 io_cancel sys_io_cancel +250 i386 fadvise64 sys_ia32_fadvise64 +# 251 is available for reuse (was briefly sys_set_zone_reclaim) +252 i386 exit_group sys_exit_group +253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +254 i386 epoll_create sys_epoll_create +255 i386 epoll_ctl sys_epoll_ctl +256 i386 epoll_wait sys_epoll_wait +257 i386 remap_file_pages sys_remap_file_pages +258 i386 set_tid_address sys_set_tid_address +259 i386 timer_create sys_timer_create compat_sys_timer_create +260 i386 timer_settime sys_timer_settime32 +261 i386 timer_gettime sys_timer_gettime32 +262 i386 timer_getoverrun sys_timer_getoverrun +263 i386 timer_delete sys_timer_delete +264 i386 clock_settime sys_clock_settime32 +265 i386 clock_gettime sys_clock_gettime32 +266 i386 clock_getres sys_clock_getres_time32 +267 i386 clock_nanosleep sys_clock_nanosleep_time32 +268 i386 statfs64 sys_statfs64 compat_sys_statfs64 +269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 +270 i386 tgkill sys_tgkill +271 i386 utimes sys_utimes_time32 +272 i386 fadvise64_64 sys_ia32_fadvise64_64 +273 i386 vserver +274 i386 mbind sys_mbind +275 i386 get_mempolicy sys_get_mempolicy +276 i386 set_mempolicy sys_set_mempolicy +277 i386 mq_open sys_mq_open compat_sys_mq_open +278 i386 mq_unlink sys_mq_unlink +279 i386 mq_timedsend sys_mq_timedsend_time32 +280 i386 mq_timedreceive sys_mq_timedreceive_time32 +281 i386 mq_notify sys_mq_notify compat_sys_mq_notify +282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr +283 i386 kexec_load sys_kexec_load compat_sys_kexec_load +284 i386 waitid sys_waitid compat_sys_waitid +# 285 sys_setaltroot +286 i386 add_key sys_add_key +287 i386 request_key sys_request_key +288 i386 keyctl sys_keyctl compat_sys_keyctl +289 i386 ioprio_set sys_ioprio_set +290 i386 ioprio_get sys_ioprio_get +291 i386 inotify_init sys_inotify_init +292 i386 inotify_add_watch sys_inotify_add_watch +293 i386 inotify_rm_watch sys_inotify_rm_watch +294 i386 migrate_pages sys_migrate_pages +295 i386 openat sys_openat compat_sys_openat +296 i386 mkdirat sys_mkdirat +297 i386 mknodat sys_mknodat +298 i386 fchownat sys_fchownat +299 i386 futimesat sys_futimesat_time32 +300 i386 fstatat64 sys_fstatat64 compat_sys_ia32_fstatat64 +301 i386 unlinkat sys_unlinkat +302 i386 renameat sys_renameat +303 i386 linkat sys_linkat +304 i386 symlinkat sys_symlinkat +305 i386 readlinkat sys_readlinkat +306 i386 fchmodat sys_fchmodat +307 i386 faccessat sys_faccessat +308 i386 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32 +309 i386 ppoll sys_ppoll_time32 compat_sys_ppoll_time32 +310 i386 unshare sys_unshare +311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list +312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list +313 i386 splice sys_splice +314 i386 sync_file_range sys_ia32_sync_file_range +315 i386 tee sys_tee +316 i386 vmsplice sys_vmsplice +317 i386 move_pages sys_move_pages +318 i386 getcpu sys_getcpu +319 i386 epoll_pwait sys_epoll_pwait +320 i386 utimensat sys_utimensat_time32 +321 i386 signalfd sys_signalfd compat_sys_signalfd +322 i386 timerfd_create sys_timerfd_create +323 i386 eventfd sys_eventfd +324 i386 fallocate sys_ia32_fallocate +325 i386 timerfd_settime sys_timerfd_settime32 +326 i386 timerfd_gettime sys_timerfd_gettime32 +327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 +328 i386 eventfd2 sys_eventfd2 +329 i386 epoll_create1 sys_epoll_create1 +330 i386 dup3 sys_dup3 +331 i386 pipe2 sys_pipe2 +332 i386 inotify_init1 sys_inotify_init1 +333 i386 preadv sys_preadv compat_sys_preadv +334 i386 pwritev sys_pwritev compat_sys_pwritev +335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +336 i386 perf_event_open sys_perf_event_open +337 i386 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32 +338 i386 fanotify_init sys_fanotify_init +339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark +340 i386 prlimit64 sys_prlimit64 +341 i386 name_to_handle_at sys_name_to_handle_at +342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at +343 i386 clock_adjtime sys_clock_adjtime32 +344 i386 syncfs sys_syncfs +345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg +346 i386 setns sys_setns +347 i386 process_vm_readv sys_process_vm_readv +348 i386 process_vm_writev sys_process_vm_writev +349 i386 kcmp sys_kcmp +350 i386 finit_module sys_finit_module +351 i386 sched_setattr sys_sched_setattr +352 i386 sched_getattr sys_sched_getattr +353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp +355 i386 getrandom sys_getrandom +356 i386 memfd_create sys_memfd_create +357 i386 bpf sys_bpf +358 i386 execveat sys_execveat compat_sys_execveat +359 i386 socket sys_socket +360 i386 socketpair sys_socketpair +361 i386 bind sys_bind +362 i386 connect sys_connect +363 i386 listen sys_listen +364 i386 accept4 sys_accept4 +365 i386 getsockopt sys_getsockopt sys_getsockopt +366 i386 setsockopt sys_setsockopt sys_setsockopt +367 i386 getsockname sys_getsockname +368 i386 getpeername sys_getpeername +369 i386 sendto sys_sendto +370 i386 sendmsg sys_sendmsg compat_sys_sendmsg +371 i386 recvfrom sys_recvfrom compat_sys_recvfrom +372 i386 recvmsg sys_recvmsg compat_sys_recvmsg +373 i386 shutdown sys_shutdown +374 i386 userfaultfd sys_userfaultfd +375 i386 membarrier sys_membarrier +376 i386 mlock2 sys_mlock2 +377 i386 copy_file_range sys_copy_file_range +378 i386 preadv2 sys_preadv2 compat_sys_preadv2 +379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2 +380 i386 pkey_mprotect sys_pkey_mprotect +381 i386 pkey_alloc sys_pkey_alloc +382 i386 pkey_free sys_pkey_free +383 i386 statx sys_statx +384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents +386 i386 rseq sys_rseq +393 i386 semget sys_semget +394 i386 semctl sys_semctl compat_sys_semctl +395 i386 shmget sys_shmget +396 i386 shmctl sys_shmctl compat_sys_shmctl +397 i386 shmat sys_shmat compat_sys_shmat +398 i386 shmdt sys_shmdt +399 i386 msgget sys_msgget +400 i386 msgsnd sys_msgsnd compat_sys_msgsnd +401 i386 msgrcv sys_msgrcv compat_sys_msgrcv +402 i386 msgctl sys_msgctl compat_sys_msgctl +403 i386 clock_gettime64 sys_clock_gettime +404 i386 clock_settime64 sys_clock_settime +405 i386 clock_adjtime64 sys_clock_adjtime +406 i386 clock_getres_time64 sys_clock_getres +407 i386 clock_nanosleep_time64 sys_clock_nanosleep +408 i386 timer_gettime64 sys_timer_gettime +409 i386 timer_settime64 sys_timer_settime +410 i386 timerfd_gettime64 sys_timerfd_gettime +411 i386 timerfd_settime64 sys_timerfd_settime +412 i386 utimensat_time64 sys_utimensat +413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 +414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 +416 i386 io_pgetevents_time64 sys_io_pgetevents +417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 +418 i386 mq_timedsend_time64 sys_mq_timedsend +419 i386 mq_timedreceive_time64 sys_mq_timedreceive +420 i386 semtimedop_time64 sys_semtimedop +421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 +422 i386 futex_time64 sys_futex +423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 i386 pidfd_send_signal sys_pidfd_send_signal +425 i386 io_uring_setup sys_io_uring_setup +426 i386 io_uring_enter sys_io_uring_enter +427 i386 io_uring_register sys_io_uring_register +428 i386 open_tree sys_open_tree +429 i386 move_mount sys_move_mount +430 i386 fsopen sys_fsopen +431 i386 fsconfig sys_fsconfig +432 i386 fsmount sys_fsmount +433 i386 fspick sys_fspick +434 i386 pidfd_open sys_pidfd_open +435 i386 clone3 sys_clone3 +436 i386 close_range sys_close_range +437 i386 openat2 sys_openat2 +438 i386 pidfd_getfd sys_pidfd_getfd +439 i386 faccessat2 sys_faccessat2 +440 i386 process_madvise sys_process_madvise +441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 i386 mount_setattr sys_mount_setattr +443 i386 quotactl_fd sys_quotactl_fd +444 i386 landlock_create_ruleset sys_landlock_create_ruleset +445 i386 landlock_add_rule sys_landlock_add_rule +446 i386 landlock_restrict_self sys_landlock_restrict_self +447 i386 memfd_secret sys_memfd_secret +448 i386 process_mrelease sys_process_mrelease +449 i386 futex_waitv sys_futex_waitv +450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl new file mode 100644 index 000000000..c84d12608 --- /dev/null +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -0,0 +1,419 @@ +# +# 64-bit system call numbers and entry vectors +# +# The format is: +# +# +# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls +# +# The abi is "common", "64" or "x32" for this file. +# +0 common read sys_read +1 common write sys_write +2 common open sys_open +3 common close sys_close +4 common stat sys_newstat +5 common fstat sys_newfstat +6 common lstat sys_newlstat +7 common poll sys_poll +8 common lseek sys_lseek +9 common mmap sys_mmap +10 common mprotect sys_mprotect +11 common munmap sys_munmap +12 common brk sys_brk +13 64 rt_sigaction sys_rt_sigaction +14 common rt_sigprocmask sys_rt_sigprocmask +15 64 rt_sigreturn sys_rt_sigreturn +16 64 ioctl sys_ioctl +17 common pread64 sys_pread64 +18 common pwrite64 sys_pwrite64 +19 64 readv sys_readv +20 64 writev sys_writev +21 common access sys_access +22 common pipe sys_pipe +23 common select sys_select +24 common sched_yield sys_sched_yield +25 common mremap sys_mremap +26 common msync sys_msync +27 common mincore sys_mincore +28 common madvise sys_madvise +29 common shmget sys_shmget +30 common shmat sys_shmat +31 common shmctl sys_shmctl +32 common dup sys_dup +33 common dup2 sys_dup2 +34 common pause sys_pause +35 common nanosleep sys_nanosleep +36 common getitimer sys_getitimer +37 common alarm sys_alarm +38 common setitimer sys_setitimer +39 common getpid sys_getpid +40 common sendfile sys_sendfile64 +41 common socket sys_socket +42 common connect sys_connect +43 common accept sys_accept +44 common sendto sys_sendto +45 64 recvfrom sys_recvfrom +46 64 sendmsg sys_sendmsg +47 64 recvmsg sys_recvmsg +48 common shutdown sys_shutdown +49 common bind sys_bind +50 common listen sys_listen +51 common getsockname sys_getsockname +52 common getpeername sys_getpeername +53 common socketpair sys_socketpair +54 64 setsockopt sys_setsockopt +55 64 getsockopt sys_getsockopt +56 common clone sys_clone +57 common fork sys_fork +58 common vfork sys_vfork +59 64 execve sys_execve +60 common exit sys_exit +61 common wait4 sys_wait4 +62 common kill sys_kill +63 common uname sys_newuname +64 common semget sys_semget +65 common semop sys_semop +66 common semctl sys_semctl +67 common shmdt sys_shmdt +68 common msgget sys_msgget +69 common msgsnd sys_msgsnd +70 common msgrcv sys_msgrcv +71 common msgctl sys_msgctl +72 common fcntl sys_fcntl +73 common flock sys_flock +74 common fsync sys_fsync +75 common fdatasync sys_fdatasync +76 common truncate sys_truncate +77 common ftruncate sys_ftruncate +78 common getdents sys_getdents +79 common getcwd sys_getcwd +80 common chdir sys_chdir +81 common fchdir sys_fchdir +82 common rename sys_rename +83 common mkdir sys_mkdir +84 common rmdir sys_rmdir +85 common creat sys_creat +86 common link sys_link +87 common unlink sys_unlink +88 common symlink sys_symlink +89 common readlink sys_readlink +90 common chmod sys_chmod +91 common fchmod sys_fchmod +92 common chown sys_chown +93 common fchown sys_fchown +94 common lchown sys_lchown +95 common umask sys_umask +96 common gettimeofday sys_gettimeofday +97 common getrlimit sys_getrlimit +98 common getrusage sys_getrusage +99 common sysinfo sys_sysinfo +100 common times sys_times +101 64 ptrace sys_ptrace +102 common getuid sys_getuid +103 common syslog sys_syslog +104 common getgid sys_getgid +105 common setuid sys_setuid +106 common setgid sys_setgid +107 common geteuid sys_geteuid +108 common getegid sys_getegid +109 common setpgid sys_setpgid +110 common getppid sys_getppid +111 common getpgrp sys_getpgrp +112 common setsid sys_setsid +113 common setreuid sys_setreuid +114 common setregid sys_setregid +115 common getgroups sys_getgroups +116 common setgroups sys_setgroups +117 common setresuid sys_setresuid +118 common getresuid sys_getresuid +119 common setresgid sys_setresgid +120 common getresgid sys_getresgid +121 common getpgid sys_getpgid +122 common setfsuid sys_setfsuid +123 common setfsgid sys_setfsgid +124 common getsid sys_getsid +125 common capget sys_capget +126 common capset sys_capset +127 64 rt_sigpending sys_rt_sigpending +128 64 rt_sigtimedwait sys_rt_sigtimedwait +129 64 rt_sigqueueinfo sys_rt_sigqueueinfo +130 common rt_sigsuspend sys_rt_sigsuspend +131 64 sigaltstack sys_sigaltstack +132 common utime sys_utime +133 common mknod sys_mknod +134 64 uselib +135 common personality sys_personality +136 common ustat sys_ustat +137 common statfs sys_statfs +138 common fstatfs sys_fstatfs +139 common sysfs sys_sysfs +140 common getpriority sys_getpriority +141 common setpriority sys_setpriority +142 common sched_setparam sys_sched_setparam +143 common sched_getparam sys_sched_getparam +144 common sched_setscheduler sys_sched_setscheduler +145 common sched_getscheduler sys_sched_getscheduler +146 common sched_get_priority_max sys_sched_get_priority_max +147 common sched_get_priority_min sys_sched_get_priority_min +148 common sched_rr_get_interval sys_sched_rr_get_interval +149 common mlock sys_mlock +150 common munlock sys_munlock +151 common mlockall sys_mlockall +152 common munlockall sys_munlockall +153 common vhangup sys_vhangup +154 common modify_ldt sys_modify_ldt +155 common pivot_root sys_pivot_root +156 64 _sysctl sys_ni_syscall +157 common prctl sys_prctl +158 common arch_prctl sys_arch_prctl +159 common adjtimex sys_adjtimex +160 common setrlimit sys_setrlimit +161 common chroot sys_chroot +162 common sync sys_sync +163 common acct sys_acct +164 common settimeofday sys_settimeofday +165 common mount sys_mount +166 common umount2 sys_umount +167 common swapon sys_swapon +168 common swapoff sys_swapoff +169 common reboot sys_reboot +170 common sethostname sys_sethostname +171 common setdomainname sys_setdomainname +172 common iopl sys_iopl +173 common ioperm sys_ioperm +174 64 create_module +175 common init_module sys_init_module +176 common delete_module sys_delete_module +177 64 get_kernel_syms +178 64 query_module +179 common quotactl sys_quotactl +180 64 nfsservctl +181 common getpmsg +182 common putpmsg +183 common afs_syscall +184 common tuxcall +185 common security +186 common gettid sys_gettid +187 common readahead sys_readahead +188 common setxattr sys_setxattr +189 common lsetxattr sys_lsetxattr +190 common fsetxattr sys_fsetxattr +191 common getxattr sys_getxattr +192 common lgetxattr sys_lgetxattr +193 common fgetxattr sys_fgetxattr +194 common listxattr sys_listxattr +195 common llistxattr sys_llistxattr +196 common flistxattr sys_flistxattr +197 common removexattr sys_removexattr +198 common lremovexattr sys_lremovexattr +199 common fremovexattr sys_fremovexattr +200 common tkill sys_tkill +201 common time sys_time +202 common futex sys_futex +203 common sched_setaffinity sys_sched_setaffinity +204 common sched_getaffinity sys_sched_getaffinity +205 64 set_thread_area +206 64 io_setup sys_io_setup +207 common io_destroy sys_io_destroy +208 common io_getevents sys_io_getevents +209 64 io_submit sys_io_submit +210 common io_cancel sys_io_cancel +211 64 get_thread_area +212 common lookup_dcookie sys_lookup_dcookie +213 common epoll_create sys_epoll_create +214 64 epoll_ctl_old +215 64 epoll_wait_old +216 common remap_file_pages sys_remap_file_pages +217 common getdents64 sys_getdents64 +218 common set_tid_address sys_set_tid_address +219 common restart_syscall sys_restart_syscall +220 common semtimedop sys_semtimedop +221 common fadvise64 sys_fadvise64 +222 64 timer_create sys_timer_create +223 common timer_settime sys_timer_settime +224 common timer_gettime sys_timer_gettime +225 common timer_getoverrun sys_timer_getoverrun +226 common timer_delete sys_timer_delete +227 common clock_settime sys_clock_settime +228 common clock_gettime sys_clock_gettime +229 common clock_getres sys_clock_getres +230 common clock_nanosleep sys_clock_nanosleep +231 common exit_group sys_exit_group +232 common epoll_wait sys_epoll_wait +233 common epoll_ctl sys_epoll_ctl +234 common tgkill sys_tgkill +235 common utimes sys_utimes +236 64 vserver +237 common mbind sys_mbind +238 common set_mempolicy sys_set_mempolicy +239 common get_mempolicy sys_get_mempolicy +240 common mq_open sys_mq_open +241 common mq_unlink sys_mq_unlink +242 common mq_timedsend sys_mq_timedsend +243 common mq_timedreceive sys_mq_timedreceive +244 64 mq_notify sys_mq_notify +245 common mq_getsetattr sys_mq_getsetattr +246 64 kexec_load sys_kexec_load +247 64 waitid sys_waitid +248 common add_key sys_add_key +249 common request_key sys_request_key +250 common keyctl sys_keyctl +251 common ioprio_set sys_ioprio_set +252 common ioprio_get sys_ioprio_get +253 common inotify_init sys_inotify_init +254 common inotify_add_watch sys_inotify_add_watch +255 common inotify_rm_watch sys_inotify_rm_watch +256 common migrate_pages sys_migrate_pages +257 common openat sys_openat +258 common mkdirat sys_mkdirat +259 common mknodat sys_mknodat +260 common fchownat sys_fchownat +261 common futimesat sys_futimesat +262 common newfstatat sys_newfstatat +263 common unlinkat sys_unlinkat +264 common renameat sys_renameat +265 common linkat sys_linkat +266 common symlinkat sys_symlinkat +267 common readlinkat sys_readlinkat +268 common fchmodat sys_fchmodat +269 common faccessat sys_faccessat +270 common pselect6 sys_pselect6 +271 common ppoll sys_ppoll +272 common unshare sys_unshare +273 64 set_robust_list sys_set_robust_list +274 64 get_robust_list sys_get_robust_list +275 common splice sys_splice +276 common tee sys_tee +277 common sync_file_range sys_sync_file_range +278 64 vmsplice sys_vmsplice +279 64 move_pages sys_move_pages +280 common utimensat sys_utimensat +281 common epoll_pwait sys_epoll_pwait +282 common signalfd sys_signalfd +283 common timerfd_create sys_timerfd_create +284 common eventfd sys_eventfd +285 common fallocate sys_fallocate +286 common timerfd_settime sys_timerfd_settime +287 common timerfd_gettime sys_timerfd_gettime +288 common accept4 sys_accept4 +289 common signalfd4 sys_signalfd4 +290 common eventfd2 sys_eventfd2 +291 common epoll_create1 sys_epoll_create1 +292 common dup3 sys_dup3 +293 common pipe2 sys_pipe2 +294 common inotify_init1 sys_inotify_init1 +295 64 preadv sys_preadv +296 64 pwritev sys_pwritev +297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +298 common perf_event_open sys_perf_event_open +299 64 recvmmsg sys_recvmmsg +300 common fanotify_init sys_fanotify_init +301 common fanotify_mark sys_fanotify_mark +302 common prlimit64 sys_prlimit64 +303 common name_to_handle_at sys_name_to_handle_at +304 common open_by_handle_at sys_open_by_handle_at +305 common clock_adjtime sys_clock_adjtime +306 common syncfs sys_syncfs +307 64 sendmmsg sys_sendmmsg +308 common setns sys_setns +309 common getcpu sys_getcpu +310 64 process_vm_readv sys_process_vm_readv +311 64 process_vm_writev sys_process_vm_writev +312 common kcmp sys_kcmp +313 common finit_module sys_finit_module +314 common sched_setattr sys_sched_setattr +315 common sched_getattr sys_sched_getattr +316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp +318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create +320 common kexec_file_load sys_kexec_file_load +321 common bpf sys_bpf +322 64 execveat sys_execveat +323 common userfaultfd sys_userfaultfd +324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 +326 common copy_file_range sys_copy_file_range +327 64 preadv2 sys_preadv2 +328 64 pwritev2 sys_pwritev2 +329 common pkey_mprotect sys_pkey_mprotect +330 common pkey_alloc sys_pkey_alloc +331 common pkey_free sys_pkey_free +332 common statx sys_statx +333 common io_pgetevents sys_io_pgetevents +334 common rseq sys_rseq +# don't use numbers 387 through 423, add new calls after the last +# 'common' entry +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node + +# +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64. These syscalls have numbers 512-547. +# Do not add new syscalls to this range. Numbers 548 and above are available +# for non-x32 use. +# +512 x32 rt_sigaction compat_sys_rt_sigaction +513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn +514 x32 ioctl compat_sys_ioctl +515 x32 readv sys_readv +516 x32 writev sys_writev +517 x32 recvfrom compat_sys_recvfrom +518 x32 sendmsg compat_sys_sendmsg +519 x32 recvmsg compat_sys_recvmsg +520 x32 execve compat_sys_execve +521 x32 ptrace compat_sys_ptrace +522 x32 rt_sigpending compat_sys_rt_sigpending +523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 +524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo +525 x32 sigaltstack compat_sys_sigaltstack +526 x32 timer_create compat_sys_timer_create +527 x32 mq_notify compat_sys_mq_notify +528 x32 kexec_load compat_sys_kexec_load +529 x32 waitid compat_sys_waitid +530 x32 set_robust_list compat_sys_set_robust_list +531 x32 get_robust_list compat_sys_get_robust_list +532 x32 vmsplice sys_vmsplice +533 x32 move_pages sys_move_pages +534 x32 preadv compat_sys_preadv64 +535 x32 pwritev compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg compat_sys_recvmmsg_time64 +538 x32 sendmmsg compat_sys_sendmmsg +539 x32 process_vm_readv sys_process_vm_readv +540 x32 process_vm_writev sys_process_vm_writev +541 x32 setsockopt sys_setsockopt +542 x32 getsockopt sys_getsockopt +543 x32 io_setup compat_sys_io_setup +544 x32 io_submit compat_sys_io_submit +545 x32 execveat compat_sys_execveat +546 x32 preadv2 compat_sys_preadv64v2 +547 x32 pwritev2 compat_sys_pwritev64v2 +# This is the end of the legacy x32 range. Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S new file mode 100644 index 000000000..ff6e7003d --- /dev/null +++ b/arch/x86/entry/thunk_32.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash) + * Copyright 2008 by Steven Rostedt, Red Hat, Inc + * (inspired by Andi Kleen's thunk_64.S) + */ + #include + #include + #include + + /* put return address in eax (arg1) */ + .macro THUNK name, func, put_ret_addr_in_eax=0 +SYM_CODE_START_NOALIGN(\name) + pushl %eax + pushl %ecx + pushl %edx + + .if \put_ret_addr_in_eax + /* Place EIP in the arg1 */ + movl 3*4(%esp), %eax + .endif + + call \func + popl %edx + popl %ecx + popl %eax + RET + _ASM_NOKPROBE(\name) +SYM_CODE_END(\name) + .endm + + THUNK preempt_schedule_thunk, preempt_schedule + THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace + EXPORT_SYMBOL(preempt_schedule_thunk) + EXPORT_SYMBOL(preempt_schedule_notrace_thunk) + diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S new file mode 100644 index 000000000..f38b07d27 --- /dev/null +++ b/arch/x86/entry/thunk_64.S @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Save registers before calling assembly functions. This avoids + * disturbance of register allocation in some inline assembly constructs. + * Copyright 2001,2002 by Andi Kleen, SuSE Labs. + */ +#include +#include "calling.h" +#include +#include + + /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ + .macro THUNK name, func +SYM_FUNC_START_NOALIGN(\name) + pushq %rbp + movq %rsp, %rbp + + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + + call \func + jmp __thunk_restore +SYM_FUNC_END(\name) + _ASM_NOKPROBE(\name) + .endm + + THUNK preempt_schedule_thunk, preempt_schedule + THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace + EXPORT_SYMBOL(preempt_schedule_thunk) + EXPORT_SYMBOL(preempt_schedule_notrace_thunk) + +SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rbp + RET + _ASM_NOKPROBE(__thunk_restore) +SYM_CODE_END(__thunk_restore) diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore new file mode 100644 index 000000000..37a6129d5 --- /dev/null +++ b/arch/x86/entry/vdso/.gitignore @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso.lds +vdsox32.lds +vdso32-syscall-syms.lds +vdso32-sysenter-syms.lds +vdso32-int80-syms.lds +vdso-image-*.c +vdso2c diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile new file mode 100644 index 000000000..3e88b9df8 --- /dev/null +++ b/arch/x86/entry/vdso/Makefile @@ -0,0 +1,220 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Building vDSO images for x86. +# + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| +ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +include $(srctree)/lib/vdso/Makefile + +# Sanitizer runtimes are unavailable and cannot be linked here. +KASAN_SANITIZE := n +KMSAN_SANITIZE_vclock_gettime.o := n +KMSAN_SANITIZE_vgetcpu.o := n + +UBSAN_SANITIZE := n +KCSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y + +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + +VDSO64-$(CONFIG_X86_64) := y +VDSOX32-$(CONFIG_X86_X32_ABI) := y +VDSO32-$(CONFIG_X86_32) := y +VDSO32-$(CONFIG_IA32_EMULATION) := y + +# files to link into the vdso +vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o +vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o +vobjs32-y += vdso32/vclock_gettime.o +vobjs-$(CONFIG_X86_SGX) += vsgx.o + +# files to link into kernel +obj-y += vma.o extable.o +KASAN_SANITIZE_vma.o := y +UBSAN_SANITIZE_vma.o := y +KCSAN_SANITIZE_vma.o := y +OBJECT_FILES_NON_STANDARD_vma.o := n + +# vDSO images to build +vdso_img-$(VDSO64-y) += 64 +vdso_img-$(VDSOX32-y) += x32 +vdso_img-$(VDSO32-y) += 32 + +obj-$(VDSO32-y) += vdso32-setup.o + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) +vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.lds $(vobjs-y) +targets += vdso32/vdso32.lds $(vobjs32-y) + +# Build the vDSO image C files and link them in. +vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) +vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) +vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) +obj-y += $(vdso_img_objs) +targets += $(vdso_img_cfiles) +targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) + +CPPFLAGS_vdso.lds += -P -C + +VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ + -z max-page-size=4096 + +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso_and_check) + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi +hostprogs += vdso2c + +quiet_cmd_vdso2c = VDSO2C $@ + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ + +$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE + $(call if_changed,vdso2c) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \ + -fno-omit-frame-pointer -foptimize-sibling-calls \ + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + +$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO + +# +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +# +CFLAGS_REMOVE_vclock_gettime.o = -pg +CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg +CFLAGS_REMOVE_vgetcpu.o = -pg +CFLAGS_REMOVE_vsgx.o = -pg + +# +# X32 processes use x32 vDSO to access 64bit kernel data. +# +# Build x32 vDSO image: +# 1. Compile x32 vDSO as 64bit. +# 2. Convert object files to x32. +# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes +# so that it can reach 64bit address space with 64bit pointers. +# + +CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) +VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ + -z max-page-size=4096 + +# x32-rebranded versions +vobjx32s-y := $(vobjs-y:.o=-x32.o) + +# same thing, but in the output directory +vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) + +# Convert 64bit object file to x32 for x32 vDSO. +quiet_cmd_x32 = X32 $@ + cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ + +$(obj)/%-x32.o: $(obj)/%.o FORCE + $(call if_changed,x32) + +targets += vdsox32.lds $(vobjx32s-y) + +$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE + $(call if_changed,vdso_and_check) + +CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) +VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO +$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 + +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic +KBUILD_CFLAGS_32 += -fno-stack-protector +KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) +KBUILD_CFLAGS_32 += -fno-omit-frame-pointer +KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + +$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + +$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE + $(call if_changed,vdso_and_check) + +# +# The DSO images are built using a special linker script. +# +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(LD) -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ + -T $(filter %.lds,$^) $(filter %.o,$^) && \ + sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' + +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \ + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack +GCOV_PROFILE := n + +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) + +# +# Install the unstripped copies of vdso*.so. If our toolchain supports +# build-id, install .build-id links as well. +# +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) +define cmd_vdso_install + cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ + if readelf -n $< |grep -q 'Build ID'; then \ + buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ + first=`echo $$buildid | cut -b-2`; \ + last=`echo $$buildid | cut -b3-`; \ + mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ + ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ + fi +endef + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE + @mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso + $(call cmd,vdso_install) + +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) + +clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so* diff --git a/arch/x86/entry/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh new file mode 100755 index 000000000..7ee90a9b5 --- /dev/null +++ b/arch/x86/entry/vdso/checkundef.sh @@ -0,0 +1,10 @@ +#!/bin/sh +nm="$1" +file="$2" +$nm "$file" | grep '^ *U' > /dev/null 2>&1 +if [ $? -eq 1 ]; then + exit 0 +else + echo "$file: undefined symbols found" >&2 + exit 1 +fi diff --git a/arch/x86/entry/vdso/extable.c b/arch/x86/entry/vdso/extable.c new file mode 100644 index 000000000..afcf5b65b --- /dev/null +++ b/arch/x86/entry/vdso/extable.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +struct vdso_exception_table_entry { + int insn, fixup; +}; + +bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr) +{ + const struct vdso_image *image = current->mm->context.vdso_image; + const struct vdso_exception_table_entry *extable; + unsigned int nr_entries, i; + unsigned long base; + + /* + * Do not attempt to fixup #DB or #BP. It's impossible to identify + * whether or not a #DB/#BP originated from within an SGX enclave and + * SGX enclaves are currently the only use case for vDSO fixup. + */ + if (trapnr == X86_TRAP_DB || trapnr == X86_TRAP_BP) + return false; + + if (!current->mm->context.vdso) + return false; + + base = (unsigned long)current->mm->context.vdso + image->extable_base; + nr_entries = image->extable_len / (sizeof(*extable)); + extable = image->extable; + + for (i = 0; i < nr_entries; i++) { + if (regs->ip == base + extable[i].insn) { + regs->ip = base + extable[i].fixup; + regs->di = trapnr; + regs->si = error_code; + regs->dx = fault_addr; + return true; + } + } + + return false; +} diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h new file mode 100644 index 000000000..b56f6b012 --- /dev/null +++ b/arch/x86/entry/vdso/extable.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_EXTABLE_H +#define __VDSO_EXTABLE_H + +/* + * Inject exception fixup for vDSO code. Unlike normal exception fixup, + * vDSO uses a dedicated handler the addresses are relative to the overall + * exception table, not each individual entry. + */ +#ifdef __ASSEMBLY__ +#define _ASM_VDSO_EXTABLE_HANDLE(from, to) \ + ASM_VDSO_EXTABLE_HANDLE from to + +.macro ASM_VDSO_EXTABLE_HANDLE from:req to:req + .pushsection __ex_table, "a" + .long (\from) - __ex_table + .long (\to) - __ex_table + .popsection +.endm +#else +#define _ASM_VDSO_EXTABLE_HANDLE(from, to) \ + ".pushsection __ex_table, \"a\"\n" \ + ".long (" #from ") - __ex_table\n" \ + ".long (" #to ") - __ex_table\n" \ + ".popsection\n" +#endif + +#endif /* __VDSO_EXTABLE_H */ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c new file mode 100644 index 000000000..7d70935b6 --- /dev/null +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright 2006 Andi Kleen, SUSE Labs. + * Copyright 2019 ARM Limited + * + * 32 Bit compat layer by Stefani Seibold + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#include +#include +#include + +#include "../../../../lib/vdso/gettimeofday.c" + +extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t); + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); + +__kernel_old_time_t __vdso_time(__kernel_old_time_t *t) +{ + return __cvdso_time(t); +} + +__kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time"))); + + +#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) +/* both 64-bit and x32 use these */ +extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res); + +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int clock_gettime(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_clock_getres(clockid_t clock, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock, res); +} +int clock_getres(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_getres"))); + +#else +/* i386 only */ +extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res); + +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) +{ + return __cvdso_clock_gettime32(clock, ts); +} + +int clock_gettime(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime64"))); + +int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32(clock, res); +} + +int clock_getres(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_getres"))); +#endif diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S new file mode 100644 index 000000000..bafa73f09 --- /dev/null +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +/* + * Linker script for vDSO. This is an ELF shared object prelinked to + * its virtual address, and with only one read-only segment. + * This script controls its layout. + */ + +SECTIONS +{ + /* + * User/kernel shared data is before the vDSO. This may be a little + * uglier than putting it after the vDSO, but it avoids issues with + * non-allocatable things that dangle past the end of the PT_LOAD + * segment. + */ + + vvar_start = . - 4 * PAGE_SIZE; + vvar_page = vvar_start; + + /* Place all vvars at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; +#include +#undef EMIT_VVAR + + pvclock_page = vvar_start + PAGE_SIZE; + hvclock_page = vvar_start + 2 * PAGE_SIZE; + timens_page = vvar_start + 3 * PAGE_SIZE; + +#undef _ASM_X86_VVAR_H + /* Place all vvars in timens too at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset; +#include +#undef EMIT_VVAR + + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { + *(.rodata*) + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } :text + + /* + * Discard .note.gnu.property sections which are unused and have + * different alignment requirement from vDSO note sections. + */ + /DISCARD/ : { + *(.note.gnu.property) + } + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + + /* + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. + */ + + .text : { + *(.text*) + } :text =0x90909090, + + + + .altinstructions : { *(.altinstructions) } :text + .altinstr_replacement : { *(.altinstr_replacement) } :text + + __ex_table : { *(__ex_table) } :text + + /DISCARD/ : { + *(.discard) + *(.discard.*) + *(__bug_table) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S new file mode 100644 index 000000000..794231701 --- /dev/null +++ b/arch/x86/entry/vdso/vdso-note.S @@ -0,0 +1,15 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END + +BUILD_SALT diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S new file mode 100644 index 000000000..e8c60ae7a --- /dev/null +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linker script for 64-bit vDSO. + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#define BUILD_VDSO64 + +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + time; + __vdso_time; + clock_getres; + __vdso_clock_getres; +#ifdef CONFIG_X86_SGX + __vdso_sgx_enter_enclave; +#endif + local: *; + }; +} diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c new file mode 100644 index 000000000..90d15f2a7 --- /dev/null +++ b/arch/x86/entry/vdso/vdso2c.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vdso2c - A vdso image preparation tool + * Copyright (c) 2014 Andy Lutomirski and others + * + * vdso2c requires stripped and unstripped input. It would be trivial + * to fully strip the input in here, but, for reasons described below, + * we need to write a section table. Doing this is more or less + * equivalent to dropping all non-allocatable sections, but it's + * easier to let objcopy handle that instead of doing it ourselves. + * If we ever need to do something fancier than what objcopy provides, + * it would be straightforward to add here. + * + * We're keep a section table for a few reasons: + * + * The Go runtime had a couple of bugs: it would read the section + * table to try to figure out how many dynamic symbols there were (it + * shouldn't have looked at the section table at all) and, if there + * were no SHT_SYNDYM section table entry, it would use an + * uninitialized value for the number of symbols. An empty DYNSYM + * table would work, but I see no reason not to write a valid one (and + * keep full performance for old Go programs). This hack is only + * needed on x86_64. + * + * The bug was introduced on 2012-08-31 by: + * https://code.google.com/p/go/source/detail?r=56ea40aac72b + * and was fixed on 2014-06-13 by: + * https://code.google.com/p/go/source/detail?r=fc1cd5e12595 + * + * Binutils has issues debugging the vDSO: it reads the section table to + * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which + * would break build-id if we removed the section table. Binutils + * also requires that shstrndx != 0. See: + * https://sourceware.org/bugzilla/show_bug.cgi?id=17064 + * + * elfutils might not look for PT_NOTE if there is a section table at + * all. I don't know whether this matters for any practical purpose. + * + * For simplicity, rather than hacking up a partial section table, we + * just write a mostly complete one. We omit non-dynamic symbols, + * though, since they're rather large. + * + * Once binutils gets fixed, we might be able to drop this for all but + * the 64-bit vdso, since build-id only works in kernel RPMs, and + * systems that update to new enough kernel RPMs will likely update + * binutils in sync. build-id has never worked for home-built kernel + * RPMs without manual symlinking, and I suspect that no one ever does + * that. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +const char *outfilename; + +/* Symbols that we need in vdso2c. */ +enum { + sym_vvar_start, + sym_vvar_page, + sym_pvclock_page, + sym_hvclock_page, + sym_timens_page, +}; + +const int special_pages[] = { + sym_vvar_page, + sym_pvclock_page, + sym_hvclock_page, + sym_timens_page, +}; + +struct vdso_sym { + const char *name; + bool export; +}; + +struct vdso_sym required_syms[] = { + [sym_vvar_start] = {"vvar_start", true}, + [sym_vvar_page] = {"vvar_page", true}, + [sym_pvclock_page] = {"pvclock_page", true}, + [sym_hvclock_page] = {"hvclock_page", true}, + [sym_timens_page] = {"timens_page", true}, + {"VDSO32_NOTE_MASK", true}, + {"__kernel_vsyscall", true}, + {"__kernel_sigreturn", true}, + {"__kernel_rt_sigreturn", true}, + {"int80_landing_pad", true}, + {"vdso32_rt_sigreturn_landing_pad", true}, + {"vdso32_sigreturn_landing_pad", true}, +}; + +__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) +static void fail(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + fprintf(stderr, "Error: "); + vfprintf(stderr, format, ap); + if (outfilename) + unlink(outfilename); + exit(1); + va_end(ap); +} + +/* + * Evil macros for little-endian reads and writes + */ +#define GLE(x, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot) + +extern void bad_get_le(void); +#define LAST_GLE(x) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le()) + +#define GET_LE(x) \ + GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x)))) + +#define PLE(x, val, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + put_unaligned_le##bits((val), (x)), ifnot) + +extern void bad_put_le(void); +#define LAST_PLE(x, val) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le()) + +#define PUT_LE(x, val) \ + PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) + + +#define NSYMS ARRAY_SIZE(required_syms) + +#define BITSFUNC3(name, bits, suffix) name##bits##suffix +#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) +#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, ) + +#define INT_BITS BITSFUNC2(int, ELF_BITS, _t) + +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + +#define ELF_BITS 64 +#include "vdso2c.h" +#undef ELF_BITS + +#define ELF_BITS 32 +#include "vdso2c.h" +#undef ELF_BITS + +static void go(void *raw_addr, size_t raw_len, + void *stripped_addr, size_t stripped_len, + FILE *outfile, const char *name) +{ + Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr; + + if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { + go64(raw_addr, raw_len, stripped_addr, stripped_len, + outfile, name); + } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { + go32(raw_addr, raw_len, stripped_addr, stripped_len, + outfile, name); + } else { + fail("unknown ELF class\n"); + } +} + +static void map_input(const char *name, void **addr, size_t *len, int prot) +{ + off_t tmp_len; + + int fd = open(name, O_RDONLY); + if (fd == -1) + err(1, "open(%s)", name); + + tmp_len = lseek(fd, 0, SEEK_END); + if (tmp_len == (off_t)-1) + err(1, "lseek"); + *len = (size_t)tmp_len; + + *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0); + if (*addr == MAP_FAILED) + err(1, "mmap"); + + close(fd); +} + +int main(int argc, char **argv) +{ + size_t raw_len, stripped_len; + void *raw_addr, *stripped_addr; + FILE *outfile; + char *name, *tmp; + int namelen; + + if (argc != 4) { + printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n"); + return 1; + } + + /* + * Figure out the struct name. If we're writing to a .so file, + * generate raw output instead. + */ + name = strdup(argv[3]); + namelen = strlen(name); + if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { + name = NULL; + } else { + tmp = strrchr(name, '/'); + if (tmp) + name = tmp + 1; + tmp = strchr(name, '.'); + if (tmp) + *tmp = '\0'; + for (tmp = name; *tmp; tmp++) + if (*tmp == '-') + *tmp = '_'; + } + + map_input(argv[1], &raw_addr, &raw_len, PROT_READ); + map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ); + + outfilename = argv[3]; + outfile = fopen(outfilename, "w"); + if (!outfile) + err(1, "fopen(%s)", outfilename); + + go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name); + + munmap(raw_addr, raw_len); + munmap(stripped_addr, stripped_len); + fclose(outfile); + + return 0; +} diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h new file mode 100644 index 000000000..5264daa88 --- /dev/null +++ b/arch/x86/entry/vdso/vdso2c.h @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is included twice from vdso2c.c. It generates code for 32-bit + * and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs + * are built for 32-bit userspace. + */ + +static void BITSFUNC(copy)(FILE *outfile, const unsigned char *data, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (i % 10 == 0) + fprintf(outfile, "\n\t"); + fprintf(outfile, "0x%02X, ", (int)(data)[i]); + } +} + + +/* + * Extract a section from the input data into a standalone blob. Used to + * capture kernel-only data that needs to persist indefinitely, e.g. the + * exception fixup tables, but only in the kernel, i.e. the section can + * be stripped from the final vDSO image. + */ +static void BITSFUNC(extract)(const unsigned char *data, size_t data_len, + FILE *outfile, ELF(Shdr) *sec, const char *name) +{ + unsigned long offset; + size_t len; + + offset = (unsigned long)GET_LE(&sec->sh_offset); + len = (size_t)GET_LE(&sec->sh_size); + + if (offset + len > data_len) + fail("section to extract overruns input data"); + + fprintf(outfile, "static const unsigned char %s[%zu] = {", name, len); + BITSFUNC(copy)(outfile, data + offset, len); + fprintf(outfile, "\n};\n\n"); +} + +static void BITSFUNC(go)(void *raw_addr, size_t raw_len, + void *stripped_addr, size_t stripped_len, + FILE *outfile, const char *image_name) +{ + int found_load = 0; + unsigned long load_size = -1; /* Work around bogus warning */ + unsigned long mapping_size; + ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; + unsigned long i, syms_nr; + ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, + *alt_sec = NULL, *extable_sec = NULL; + ELF(Dyn) *dyn = 0, *dyn_end = 0; + const char *secstrings; + INT_BITS syms[NSYMS] = {}; + + ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); + + if (GET_LE(&hdr->e_type) != ET_DYN) + fail("input is not a shared object\n"); + + /* Walk the segment table. */ + for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { + if (GET_LE(&pt[i].p_type) == PT_LOAD) { + if (found_load) + fail("multiple PT_LOAD segs\n"); + + if (GET_LE(&pt[i].p_offset) != 0 || + GET_LE(&pt[i].p_vaddr) != 0) + fail("PT_LOAD in wrong place\n"); + + if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz)) + fail("cannot handle memsz != filesz\n"); + + load_size = GET_LE(&pt[i].p_memsz); + found_load = 1; + } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) { + dyn = raw_addr + GET_LE(&pt[i].p_offset); + dyn_end = raw_addr + GET_LE(&pt[i].p_offset) + + GET_LE(&pt[i].p_memsz); + } + } + if (!found_load) + fail("no PT_LOAD seg\n"); + + if (stripped_len < load_size) + fail("stripped input is too short\n"); + + if (!dyn) + fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n"); + + /* Walk the dynamic table */ + for (i = 0; dyn + i < dyn_end && + GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { + typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag); + if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA || + tag == DT_RELENT || tag == DT_TEXTREL) + fail("vdso image contains dynamic relocations\n"); + } + + /* Walk the section table */ + secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) + + GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx); + secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset); + for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { + ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) + + GET_LE(&hdr->e_shentsize) * i; + if (GET_LE(&sh->sh_type) == SHT_SYMTAB) + symtab_hdr = sh; + + if (!strcmp(secstrings + GET_LE(&sh->sh_name), + ".altinstructions")) + alt_sec = sh; + if (!strcmp(secstrings + GET_LE(&sh->sh_name), "__ex_table")) + extable_sec = sh; + } + + if (!symtab_hdr) + fail("no symbol table\n"); + + strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) + + GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link); + + syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); + /* Walk the symbol table */ + for (i = 0; i < syms_nr; i++) { + unsigned int k; + ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) + + GET_LE(&symtab_hdr->sh_entsize) * i; + const char *sym_name = raw_addr + + GET_LE(&strtab_hdr->sh_offset) + + GET_LE(&sym->st_name); + + for (k = 0; k < NSYMS; k++) { + if (!strcmp(sym_name, required_syms[k].name)) { + if (syms[k]) { + fail("duplicate symbol %s\n", + required_syms[k].name); + } + + /* + * Careful: we use negative addresses, but + * st_value is unsigned, so we rely + * on syms[k] being a signed type of the + * correct width. + */ + syms[k] = GET_LE(&sym->st_value); + } + } + } + + /* Validate mapping addresses. */ + for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { + INT_BITS symval = syms[special_pages[i]]; + + if (!symval) + continue; /* The mapping isn't used; ignore it. */ + + if (symval % 4096) + fail("%s must be a multiple of 4096\n", + required_syms[i].name); + if (symval + 4096 < syms[sym_vvar_start]) + fail("%s underruns vvar_start\n", + required_syms[i].name); + if (symval + 4096 > 0) + fail("%s is on the wrong side of the vdso text\n", + required_syms[i].name); + } + if (syms[sym_vvar_start] % 4096) + fail("vvar_begin must be a multiple of 4096\n"); + + if (!image_name) { + fwrite(stripped_addr, stripped_len, 1, outfile); + return; + } + + mapping_size = (stripped_len + 4095) / 4096 * 4096; + + fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); + fprintf(outfile, "#include \n"); + fprintf(outfile, "#include \n"); + fprintf(outfile, "#include \n"); + fprintf(outfile, "\n"); + fprintf(outfile, + "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", + mapping_size); + for (i = 0; i < stripped_len; i++) { + if (i % 10 == 0) + fprintf(outfile, "\n\t"); + fprintf(outfile, "0x%02X, ", + (int)((unsigned char *)stripped_addr)[i]); + } + fprintf(outfile, "\n};\n\n"); + if (extable_sec) + BITSFUNC(extract)(raw_addr, raw_len, outfile, + extable_sec, "extable"); + + fprintf(outfile, "const struct vdso_image %s = {\n", image_name); + fprintf(outfile, "\t.data = raw_data,\n"); + fprintf(outfile, "\t.size = %lu,\n", mapping_size); + if (alt_sec) { + fprintf(outfile, "\t.alt = %lu,\n", + (unsigned long)GET_LE(&alt_sec->sh_offset)); + fprintf(outfile, "\t.alt_len = %lu,\n", + (unsigned long)GET_LE(&alt_sec->sh_size)); + } + if (extable_sec) { + fprintf(outfile, "\t.extable_base = %lu,\n", + (unsigned long)GET_LE(&extable_sec->sh_offset)); + fprintf(outfile, "\t.extable_len = %lu,\n", + (unsigned long)GET_LE(&extable_sec->sh_size)); + fprintf(outfile, "\t.extable = extable,\n"); + } + + for (i = 0; i < NSYMS; i++) { + if (required_syms[i].export && syms[i]) + fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n", + required_syms[i].name, (int64_t)syms[i]); + } + fprintf(outfile, "};\n"); +} diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c new file mode 100644 index 000000000..43842fade --- /dev/null +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) Copyright 2002 Linus Torvalds + * Portions based on the vdso-randomization code from exec-shield: + * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar + * + * This file contains the needed initializations to support sysenter. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_DEFAULT 0 +#else +#define VDSO_DEFAULT 1 +#endif + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT; + +static int __init vdso32_setup(char *s) +{ + vdso32_enabled = simple_strtoul(s, NULL, 0); + + if (vdso32_enabled > 1) { + pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n"); + vdso32_enabled = 0; + } + + return 1; +} + +/* + * For consistency, the argument vdso32=[012] affects the 32-bit vDSO + * behavior on both 64-bit and 32-bit kernels. + * On 32-bit kernels, vdso=[012] means the same thing. + */ +__setup("vdso32=", vdso32_setup); + +#ifdef CONFIG_X86_32 +__setup_param("vdso=", vdso_setup, vdso32_setup, 0); +#endif + +int __init sysenter_setup(void) +{ + init_vdso_image(&vdso_image_32); + + return 0; +} + +#ifdef CONFIG_X86_64 + +subsys_initcall(sysenter_setup); + +#ifdef CONFIG_SYSCTL +/* Register vsyscall32 into the ABI table */ +#include + +static struct ctl_table abi_table2[] = { + { + .procname = "vsyscall32", + .data = &vdso32_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static struct ctl_table abi_root_table2[] = { + { + .procname = "abi", + .mode = 0555, + .child = abi_table2 + }, + {} +}; + +static __init int ia32_binfmt_init(void) +{ + register_sysctl_table(abi_root_table2); + return 0; +} +__initcall(ia32_binfmt_init); +#endif /* CONFIG_SYSCTL */ + +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/entry/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore new file mode 100644 index 000000000..516738484 --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso32.lds diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S new file mode 100644 index 000000000..2cbd39939 --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include + +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END + +BUILD_SALT diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S new file mode 100644 index 000000000..1bd068f72 --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + +#ifndef SYSCALL_ENTER_KERNEL +#define SYSCALL_ENTER_KERNEL int $0x80 +#endif + + .text + .globl __kernel_sigreturn + .type __kernel_sigreturn,@function + nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ + ALIGN +__kernel_sigreturn: +.LSTART_sigreturn: + popl %eax /* XXX does this mean it needs unwind info? */ + movl $__NR_sigreturn, %eax + SYSCALL_ENTER_KERNEL +.LEND_sigreturn: +SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) + nop + .size __kernel_sigreturn,.-.LSTART_sigreturn + + .globl __kernel_rt_sigreturn + .type __kernel_rt_sigreturn,@function + ALIGN +__kernel_rt_sigreturn: +.LSTART_rt_sigreturn: + movl $__NR_rt_sigreturn, %eax + SYSCALL_ENTER_KERNEL +.LEND_rt_sigreturn: +SYM_INNER_LABEL(vdso32_rt_sigreturn_landing_pad, SYM_L_GLOBAL) + nop + .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + .previous + + .section .eh_frame,"a",@progbits +.LSTARTFRAMEDLSI1: + .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 +.LSTARTCIEDLSI1: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zRS" /* NUL-terminated augmentation string */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0 /* DW_CFA_nop */ + .align 4 +.LENDCIEDLSI1: + .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ +.LSTARTFDEDLSI1: + .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ + /* HACK: The dwarf2 unwind routines will subtract 1 from the + return address to get an address in the middle of the + presumed call instruction. Since we didn't get here via + a call, we need to include the nop before the real start + to make up for it. */ + .long .LSTART_sigreturn-1-. /* PC-relative start address */ + .long .LEND_sigreturn-.LSTART_sigreturn+1 + .uleb128 0 /* Augmentation */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + complicated by the fact that the "CFA" is always assumed to + be the value of the stack pointer in the caller. This means + that we must define the CFA of this body of code to be the + saved value of the stack pointer in the sigcontext. Which + also means that there is no fixed relation to the other + saved registers, which means that we must use DW_CFA_expression + to compute their addresses. It also means that when we + adjust the stack with the popl, we have to do it all over again. */ + +#define do_cfa_expr(offset) \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ + .byte 0x06; /* DW_OP_deref */ \ +1: + +#define do_expr(regno, offset) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ +1: + + do_cfa_expr(IA32_SIGCONTEXT_sp+4) + do_expr(0, IA32_SIGCONTEXT_ax+4) + do_expr(1, IA32_SIGCONTEXT_cx+4) + do_expr(2, IA32_SIGCONTEXT_dx+4) + do_expr(3, IA32_SIGCONTEXT_bx+4) + do_expr(5, IA32_SIGCONTEXT_bp+4) + do_expr(6, IA32_SIGCONTEXT_si+4) + do_expr(7, IA32_SIGCONTEXT_di+4) + do_expr(8, IA32_SIGCONTEXT_ip+4) + + .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ + + do_cfa_expr(IA32_SIGCONTEXT_sp) + do_expr(0, IA32_SIGCONTEXT_ax) + do_expr(1, IA32_SIGCONTEXT_cx) + do_expr(2, IA32_SIGCONTEXT_dx) + do_expr(3, IA32_SIGCONTEXT_bx) + do_expr(5, IA32_SIGCONTEXT_bp) + do_expr(6, IA32_SIGCONTEXT_si) + do_expr(7, IA32_SIGCONTEXT_di) + do_expr(8, IA32_SIGCONTEXT_ip) + + .align 4 +.LENDFDEDLSI1: + + .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ +.LSTARTFDEDLSI2: + .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ + /* HACK: See above wrt unwind library assumptions. */ + .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ + .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 + .uleb128 0 /* Augmentation */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + slightly less complicated than the above, since we don't + modify the stack pointer in the process. */ + + do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp) + do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax) + do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx) + do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx) + do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx) + do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp) + do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si) + do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di) + do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip) + + .align 4 +.LENDFDEDLSI2: + .previous diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S new file mode 100644 index 000000000..d33c6513f --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AT_SYSINFO entry point +*/ + +#include +#include +#include +#include + + .text + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function + ALIGN +__kernel_vsyscall: + CFI_STARTPROC + /* + * Reshuffle regs so that all of any of the entry instructions + * will preserve enough state. + * + * A really nice entry sequence would be: + * pushl %edx + * pushl %ecx + * movl %esp, %ecx + * + * Unfortunately, naughty Android versions between July and December + * 2015 actually hardcode the traditional Linux SYSENTER entry + * sequence. That is severely broken for a number of reasons (ask + * anyone with an AMD CPU, for example). Nonetheless, we try to keep + * it working approximately as well as it ever worked. + * + * This link may elucidate some of the history: + * https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7 + * personally, I find it hard to understand what's going on there. + * + * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE. + * Execute an indirect call to the address in the AT_SYSINFO auxv + * entry. That is the ONLY correct way to make a fast 32-bit system + * call on Linux. (Open-coding int $0x80 is also fine, but it's + * slow.) + */ + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + + #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" + #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" + +#ifdef CONFIG_X86_64 + /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ + ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ + SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 +#else + ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP +#endif + + /* Enter using int $0x80 */ + int $0x80 +SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) + + /* + * Restore EDX and ECX in case they were clobbered. EBP is not + * clobbered (the kernel restores it), but it's cleaner and + * probably faster to pop it than to adjust ESP using addl. + */ + popl %ebp + CFI_RESTORE ebp + CFI_ADJUST_CFA_OFFSET -4 + popl %edx + CFI_RESTORE edx + CFI_ADJUST_CFA_OFFSET -4 + popl %ecx + CFI_RESTORE ecx + CFI_ADJUST_CFA_OFFSET -4 + RET + CFI_ENDPROC + + .size __kernel_vsyscall,.-__kernel_vsyscall + .previous diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c new file mode 100644 index 000000000..283ed9d00 --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +#define BUILD_VDSO32 + +#ifdef CONFIG_X86_64 + +/* + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel + * configuration + */ +#undef CONFIG_64BIT +#undef CONFIG_X86_64 +#undef CONFIG_COMPAT +#undef CONFIG_PGTABLE_LEVELS +#undef CONFIG_ILLEGAL_POINTER_VALUE +#undef CONFIG_SPARSEMEM_VMEMMAP +#undef CONFIG_NR_CPUS +#undef CONFIG_PARAVIRT_XXL + +#define CONFIG_X86_32 1 +#define CONFIG_PGTABLE_LEVELS 2 +#define CONFIG_PAGE_OFFSET 0 +#define CONFIG_ILLEGAL_POINTER_VALUE 0 +#define CONFIG_NR_CPUS 1 + +#define BUILD_VDSO32_64 + +#endif + +#include "../vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S new file mode 100644 index 000000000..c7720995a --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linker script for 32-bit vDSO. + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#include + +#define BUILD_VDSO32 + +#include "../vdso-layout.lds.S" + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_vsyscall); + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; + }; + + LINUX_2.5 { + global: + __kernel_vsyscall; + __kernel_sigreturn; + __kernel_rt_sigreturn; + local: *; + }; +} diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S new file mode 100644 index 000000000..16a8050a4 --- /dev/null +++ b/arch/x86/entry/vdso/vdsox32.lds.S @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linker script for x32 vDSO. + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#define BUILD_VDSOX32 + +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_getcpu; + __vdso_time; + __vdso_clock_getres; + local: *; + }; +} diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c new file mode 100644 index 000000000..b88a82bbc --- /dev/null +++ b/arch/x86/entry/vdso/vgetcpu.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * + * Fast user context implementation of getcpu() + */ + +#include +#include +#include +#include + +notrace long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + vdso_read_cpunode(cpu, node); + + return 0; +} + +long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) + __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c new file mode 100644 index 000000000..128866139 --- /dev/null +++ b/arch/x86/entry/vdso/vma.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2007 Andi Kleen, SUSE Labs. + * + * This contains most of the x86 vDSO kernel-side code. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef _ASM_X86_VVAR_H +#define EMIT_VVAR(name, offset) \ + const size_t name ## _offset = offset; +#include + +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page + _vdso_data_offset); +} +#undef EMIT_VVAR + +unsigned int vclocks_used __read_mostly; + +#if defined(CONFIG_X86_64) +unsigned int __read_mostly vdso64_enabled = 1; +#endif + +void __init init_vdso_image(const struct vdso_image *image) +{ + BUG_ON(image->size % PAGE_SIZE != 0); + + apply_alternatives((struct alt_instr *)(image->data + image->alt), + (struct alt_instr *)(image->data + image->alt + + image->alt_len)); +} + +static const struct vm_special_mapping vvar_mapping; +struct linux_binprm; + +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + + if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); + get_page(vmf->page); + return 0; +} + +static void vdso_fix_landing(const struct vdso_image *image, + struct vm_area_struct *new_vma) +{ +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + if (in_ia32_syscall() && image == &vdso_image_32) { + struct pt_regs *regs = current_pt_regs(); + unsigned long vdso_land = image->sym_int80_landing_pad; + unsigned long old_land_addr = vdso_land + + (unsigned long)current->mm->context.vdso; + + /* Fixing userspace landing - look at do_fast_syscall_32 */ + if (regs->ip == old_land_addr) + regs->ip = new_vma->vm_start + vdso_land; + } +#endif +} + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + const struct vdso_image *image = current->mm->context.vdso_image; + + vdso_fix_landing(image, new_vma); + current->mm->context.vdso = (void __user *)new_vma->vm_start; + + return 0; +} + +#ifdef CONFIG_TIME_NS +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} + +/* + * The vvar page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + mmap_read_lock(mm); + for_each_vma(vmi, vma) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_mapping)) + zap_page_range(vma, vma->vm_start, size); + } + mmap_read_unlock(mm); + + return 0; +} +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + unsigned long pfn; + long sym_offset; + + if (!image) + return VM_FAULT_SIGBUS; + + sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + + image->sym_vvar_start; + + /* + * Sanity check: a symbol offset of zero means that the page + * does not exist for this vdso image, not that the page is at + * offset zero relative to the text mapping. This should be + * impossible here, because sym_offset should only be zero for + * the page past the end of the vvar mapping. + */ + if (sym_offset == 0) + return VM_FAULT_SIGBUS; + + if (sym_offset == image->sym_vvar_page) { + struct page *timens_page = find_timens_vvar_page(vma); + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the sym_vvar_page offset and + * the real VVAR page is mapped with the sym_timens_page + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (timens_page) { + unsigned long addr; + vm_fault_t err; + + /* + * Optimization: inside time namespace pre-fault + * VVAR page too. As on timens page there are only + * offsets for clocks on VVAR, it'll be faulted + * shortly by VDSO code. + */ + addr = vmf->address + (image->sym_timens_page - sym_offset); + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; + + pfn = page_to_pfn(timens_page); + } + + return vmf_insert_pfn(vma, vmf->address, pfn); + } else if (sym_offset == image->sym_pvclock_page) { + struct pvclock_vsyscall_time_info *pvti = + pvclock_get_pvti_cpu0_va(); + if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) { + return vmf_insert_pfn_prot(vma, vmf->address, + __pa(pvti) >> PAGE_SHIFT, + pgprot_decrypted(vma->vm_page_prot)); + } + } else if (sym_offset == image->sym_hvclock_page) { + struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); + + if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) + return vmf_insert_pfn(vma, vmf->address, + virt_to_phys(tsc_pg) >> PAGE_SHIFT); + } else if (sym_offset == image->sym_timens_page) { + struct page *timens_page = find_timens_vvar_page(vma); + + if (!timens_page) + return VM_FAULT_SIGBUS; + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + return vmf_insert_pfn(vma, vmf->address, pfn); + } + + return VM_FAULT_SIGBUS; +} + +static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, +}; +static const struct vm_special_mapping vvar_mapping = { + .name = "[vvar]", + .fault = vvar_fault, +}; + +/* + * Add vdso and vvar mappings to current process. + * @image - blob to map + * @addr - request a specific address (zero to map at free addr) + */ +static int map_vdso(const struct vdso_image *image, unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long text_start; + int ret = 0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + addr = get_unmapped_area(NULL, addr, + image->size - image->sym_vvar_start, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + text_start = addr - image->sym_vvar_start; + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + */ + vma = _install_special_mapping(mm, + text_start, + image->size, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto up_fail; + } + + vma = _install_special_mapping(mm, + addr, + -image->sym_vvar_start, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP, + &vvar_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + do_munmap(mm, text_start, image->size, NULL); + } else { + current->mm->context.vdso = (void __user *)text_start; + current->mm->context.vdso_image = image; + } + +up_fail: + mmap_write_unlock(mm); + return ret; +} + +#ifdef CONFIG_X86_64 +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ +static unsigned long vdso_addr(unsigned long start, unsigned len) +{ + unsigned long addr, end; + unsigned offset; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; + if (end >= DEFAULT_MAP_WINDOW) + end = DEFAULT_MAP_WINDOW; + end -= len; + + if (end > start) { + offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } + + /* + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. + */ + addr = align_vdso_addr(addr); + + return addr; +} + +static int map_vdso_randomized(const struct vdso_image *image) +{ + unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); + + return map_vdso(image, addr); +} +#endif + +int map_vdso_once(const struct vdso_image *image, unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + mmap_write_lock(mm); + /* + * Check if we have already mapped vdso blob - fail to prevent + * abusing from userspace install_special_mapping, which may + * not do accounting and rlimit right. + * We could search vma near context.vdso, but it's a slowpath, + * so let's explicitly check all VMAs to be completely sure. + */ + for_each_vma(vmi, vma) { + if (vma_is_special_mapping(vma, &vdso_mapping) || + vma_is_special_mapping(vma, &vvar_mapping)) { + mmap_write_unlock(mm); + return -EEXIST; + } + } + mmap_write_unlock(mm); + + return map_vdso(image, addr); +} + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +static int load_vdso32(void) +{ + if (vdso32_enabled != 1) /* Other values all mean "disabled" */ + return 0; + + return map_vdso(&vdso_image_32, 0); +} +#endif + +#ifdef CONFIG_X86_64 +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + if (!vdso64_enabled) + return 0; + + return map_vdso_randomized(&vdso_image_64); +} + +#ifdef CONFIG_COMPAT +int compat_arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp, bool x32) +{ +#ifdef CONFIG_X86_X32_ABI + if (x32) { + if (!vdso64_enabled) + return 0; + return map_vdso_randomized(&vdso_image_x32); + } +#endif +#ifdef CONFIG_IA32_EMULATION + return load_vdso32(); +#else + return 0; +#endif +} +#endif +#else +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + return load_vdso32(); +} +#endif + +bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) + const struct vdso_image *image = current->mm->context.vdso_image; + unsigned long vdso = (unsigned long) current->mm->context.vdso; + + if (in_ia32_syscall() && image == &vdso_image_32) { + if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad || + regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad) + return true; + } +#endif + return false; +} + +#ifdef CONFIG_X86_64 +static __init int vdso_setup(char *s) +{ + vdso64_enabled = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("vdso=", vdso_setup); + +static int __init init_vdso(void) +{ + BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32); + + init_vdso_image(&vdso_image_64); + +#ifdef CONFIG_X86_X32_ABI + init_vdso_image(&vdso_image_x32); +#endif + + return 0; +} +subsys_initcall(init_vdso); +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S new file mode 100644 index 000000000..d77d278ee --- /dev/null +++ b/arch/x86/entry/vdso/vsgx.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include + +#include "extable.h" + +/* Relative to %rbp. */ +#define SGX_ENCLAVE_OFFSET_OF_RUN 16 + +/* The offsets relative to struct sgx_enclave_run. */ +#define SGX_ENCLAVE_RUN_TCS 0 +#define SGX_ENCLAVE_RUN_LEAF 8 +#define SGX_ENCLAVE_RUN_EXCEPTION_VECTOR 12 +#define SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE 14 +#define SGX_ENCLAVE_RUN_EXCEPTION_ADDR 16 +#define SGX_ENCLAVE_RUN_USER_HANDLER 24 +#define SGX_ENCLAVE_RUN_USER_DATA 32 /* not used */ +#define SGX_ENCLAVE_RUN_RESERVED_START 40 +#define SGX_ENCLAVE_RUN_RESERVED_END 256 + +.code64 +.section .text, "ax" + +SYM_FUNC_START(__vdso_sgx_enter_enclave) + /* Prolog */ + .cfi_startproc + push %rbp + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rbp, 0 + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + push %rbx + .cfi_rel_offset %rbx, -8 + + mov %ecx, %eax +.Lenter_enclave: + /* EENTER <= function <= ERESUME */ + cmp $EENTER, %eax + jb .Linvalid_input + cmp $ERESUME, %eax + ja .Linvalid_input + + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rcx + + /* Validate that the reserved area contains only zeros. */ + mov $SGX_ENCLAVE_RUN_RESERVED_START, %rbx +1: + cmpq $0, (%rcx, %rbx) + jne .Linvalid_input + add $8, %rbx + cmpq $SGX_ENCLAVE_RUN_RESERVED_END, %rbx + jne 1b + + /* Load TCS and AEP */ + mov SGX_ENCLAVE_RUN_TCS(%rcx), %rbx + lea .Lasync_exit_pointer(%rip), %rcx + + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ +.Lasync_exit_pointer: +.Lenclu_eenter_eresume: + enclu + + /* EEXIT jumps here unless the enclave is doing something fancy. */ + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx + + /* Set exit_reason. */ + movl $EEXIT, SGX_ENCLAVE_RUN_LEAF(%rbx) + + /* Invoke userspace's exit handler if one was provided. */ +.Lhandle_exit: + cmpq $0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx) + jne .Linvoke_userspace_handler + + /* Success, in the sense that ENCLU was attempted. */ + xor %eax, %eax + +.Lout: + pop %rbx + leave + .cfi_def_cfa %rsp, 8 + RET + + /* The out-of-line code runs with the pre-leave stack frame. */ + .cfi_def_cfa %rbp, 16 + +.Linvalid_input: + mov $(-EINVAL), %eax + jmp .Lout + +.Lhandle_exception: + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx + + /* Set the exception info. */ + mov %eax, (SGX_ENCLAVE_RUN_LEAF)(%rbx) + mov %di, (SGX_ENCLAVE_RUN_EXCEPTION_VECTOR)(%rbx) + mov %si, (SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE)(%rbx) + mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION_ADDR)(%rbx) + jmp .Lhandle_exit + +.Linvoke_userspace_handler: + /* Pass the untrusted RSP (at exit) to the callback via %rcx. */ + mov %rsp, %rcx + + /* Save struct sgx_enclave_exception %rbx is about to be clobbered. */ + mov %rbx, %rax + + /* Save the untrusted RSP offset in %rbx (non-volatile register). */ + mov %rsp, %rbx + and $0xf, %rbx + + /* + * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned + * _after_ pushing the parameters on the stack, hence the bonus push. + */ + and $-0x10, %rsp + push %rax + + /* Push struct sgx_enclave_exception as a param to the callback. */ + push %rax + + /* Clear RFLAGS.DF per x86_64 ABI */ + cld + + /* + * Load the callback pointer to %rax and lfence for LVI (load value + * injection) protection before making the call. + */ + mov SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax + lfence + call *%rax + + /* Undo the post-exit %rsp adjustment. */ + lea 0x10(%rsp, %rbx), %rsp + + /* + * If the return from callback is zero or negative, return immediately, + * else re-execute ENCLU with the positive return value interpreted as + * the requested ENCLU function. + */ + cmp $0, %eax + jle .Lout + jmp .Lenter_enclave + + .cfi_endproc + +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception) + +SYM_FUNC_END(__vdso_sgx_enter_enclave) diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile new file mode 100644 index 000000000..93c1b3e94 --- /dev/null +++ b/arch/x86/entry/vsyscall/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the x86 low level vsyscall code +# +obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o + diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c new file mode 100644 index 000000000..4af81df13 --- /dev/null +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2012-2014 Andy Lutomirski + * + * Based on the original implementation which is: + * Copyright (C) 2001 Andrea Arcangeli SuSE + * Copyright 2003 Andi Kleen, SuSE Labs. + * + * Parts of the original code have been moved to arch/x86/vdso/vma.c + * + * This file implements vsyscall emulation. vsyscalls are a legacy ABI: + * Userspace can request certain kernel services by calling fixed + * addresses. This concept is problematic: + * + * - It interferes with ASLR. + * - It's awkward to write code that lives in kernel addresses but is + * callable by userspace at fixed addresses. + * - The whole concept is impossible for 32-bit compat userspace. + * - UML cannot easily virtualize a vsyscall. + * + * As of mid-2014, I believe that there is no new userspace code that + * will use a vsyscall if the vDSO is present. I hope that there will + * soon be no new userspace code that will ever use a vsyscall. + * + * The code in this file emulates vsyscalls when notified of a page + * fault to a vsyscall address. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include "vsyscall_trace.h" + +static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = +#ifdef CONFIG_LEGACY_VSYSCALL_NONE + NONE; +#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) + XONLY; +#else + #error VSYSCALL config is broken +#endif + +static int __init vsyscall_setup(char *str) +{ + if (str) { + if (!strcmp("emulate", str)) + vsyscall_mode = EMULATE; + else if (!strcmp("xonly", str)) + vsyscall_mode = XONLY; + else if (!strcmp("none", str)) + vsyscall_mode = NONE; + else + return -EINVAL; + + return 0; + } + + return -EINVAL; +} +early_param("vsyscall", vsyscall_setup); + +static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, + const char *message) +{ + if (!show_unhandled_signals) + return; + + printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); +} + +static int addr_to_vsyscall_nr(unsigned long addr) +{ + int nr; + + if ((addr & ~0xC00UL) != VSYSCALL_ADDR) + return -EINVAL; + + nr = (addr & 0xC00UL) >> 10; + if (nr >= 3) + return -EINVAL; + + return nr; +} + +static bool write_ok_or_segv(unsigned long ptr, size_t size) +{ + /* + * XXX: if access_ok, get_user, and put_user handled + * sig_on_uaccess_err, this could go away. + */ + + if (!access_ok((void __user *)ptr, size)) { + struct thread_struct *thread = ¤t->thread; + + thread->error_code = X86_PF_USER | X86_PF_WRITE; + thread->cr2 = ptr; + thread->trap_nr = X86_TRAP_PF; + + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr); + return false; + } else { + return true; + } +} + +bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) +{ + struct task_struct *tsk; + unsigned long caller; + int vsyscall_nr, syscall_nr, tmp; + int prev_sig_on_uaccess_err; + long ret; + unsigned long orig_dx; + + /* Write faults or kernel-privilege faults never get fixed up. */ + if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) + return false; + + if (!(error_code & X86_PF_INSTR)) { + /* Failed vsyscall read */ + if (vsyscall_mode == EMULATE) + return false; + + /* + * User code tried and failed to read the vsyscall page. + */ + warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); + return false; + } + + /* + * No point in checking CS -- the only way to get here is a user mode + * trap to a high address, which means that we're in 64-bit user code. + */ + + WARN_ON_ONCE(address != regs->ip); + + if (vsyscall_mode == NONE) { + warn_bad_vsyscall(KERN_INFO, regs, + "vsyscall attempted with vsyscall=none"); + return false; + } + + vsyscall_nr = addr_to_vsyscall_nr(address); + + trace_emulate_vsyscall(vsyscall_nr); + + if (vsyscall_nr < 0) { + warn_bad_vsyscall(KERN_WARNING, regs, + "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); + goto sigsegv; + } + + if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { + warn_bad_vsyscall(KERN_WARNING, regs, + "vsyscall with bad stack (exploit attempt?)"); + goto sigsegv; + } + + tsk = current; + + /* + * Check for access_ok violations and find the syscall nr. + * + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and + * 64-bit, so we don't need to special-case it here. For all the + * vsyscalls, NULL means "don't write anything" not "write it at + * address 0". + */ + switch (vsyscall_nr) { + case 0: + if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) || + !write_ok_or_segv(regs->si, sizeof(struct timezone))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_gettimeofday; + break; + + case 1: + if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_time; + break; + + case 2: + if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || + !write_ok_or_segv(regs->si, sizeof(unsigned))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_getcpu; + break; + } + + /* + * Handle seccomp. regs->ip must be the original value. + * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst. + * + * We could optimize the seccomp disabled case, but performance + * here doesn't matter. + */ + regs->orig_ax = syscall_nr; + regs->ax = -ENOSYS; + tmp = secure_computing(); + if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { + warn_bad_vsyscall(KERN_DEBUG, regs, + "seccomp tried to change syscall nr or ip"); + force_exit_sig(SIGSYS); + return true; + } + regs->orig_ax = -1; + if (tmp) + goto do_ret; /* skip requested */ + + /* + * With a real vsyscall, page faults cause SIGSEGV. We want to + * preserve that behavior to make writing exploits harder. + */ + prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; + current->thread.sig_on_uaccess_err = 1; + + ret = -EFAULT; + switch (vsyscall_nr) { + case 0: + /* this decodes regs->di and regs->si on its own */ + ret = __x64_sys_gettimeofday(regs); + break; + + case 1: + /* this decodes regs->di on its own */ + ret = __x64_sys_time(regs); + break; + + case 2: + /* while we could clobber regs->dx, we didn't in the past... */ + orig_dx = regs->dx; + regs->dx = 0; + /* this decodes regs->di, regs->si and regs->dx on its own */ + ret = __x64_sys_getcpu(regs); + regs->dx = orig_dx; + break; + } + + current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; + +check_fault: + if (ret == -EFAULT) { + /* Bad news -- userspace fed a bad pointer to a vsyscall. */ + warn_bad_vsyscall(KERN_INFO, regs, + "vsyscall fault (exploit attempt?)"); + + /* + * If we failed to generate a signal for any reason, + * generate one here. (This should be impossible.) + */ + if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && + !sigismember(&tsk->pending.signal, SIGSEGV))) + goto sigsegv; + + return true; /* Don't emulate the ret. */ + } + + regs->ax = ret; + +do_ret: + /* Emulate a ret instruction. */ + regs->ip = caller; + regs->sp += 8; + return true; + +sigsegv: + force_sig(SIGSEGV); + return true; +} + +/* + * A pseudo VMA to allow ptrace access for the vsyscall page. This only + * covers the 64bit vsyscall page now. 32bit has a real VMA now and does + * not need special handling anymore: + */ +static const char *gate_vma_name(struct vm_area_struct *vma) +{ + return "[vsyscall]"; +} +static const struct vm_operations_struct gate_vma_ops = { + .name = gate_vma_name, +}; +static struct vm_area_struct gate_vma __ro_after_init = { + .vm_start = VSYSCALL_ADDR, + .vm_end = VSYSCALL_ADDR + PAGE_SIZE, + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC, + .vm_ops = &gate_vma_ops, +}; + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ +#ifdef CONFIG_COMPAT + if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL)) + return NULL; +#endif + if (vsyscall_mode == NONE) + return NULL; + return &gate_vma; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = get_gate_vma(mm); + + if (!vma) + return 0; + + return (addr >= vma->vm_start) && (addr < vma->vm_end); +} + +/* + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. + */ +int in_gate_area_no_mm(unsigned long addr) +{ + return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; +} + +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER)); +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); + pmd = pmd_offset(pud, VSYSCALL_ADDR); + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); +} + +void __init map_vsyscall(void) +{ + extern char __vsyscall_page; + unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); + + /* + * For full emulation, the page needs to exist for real. In + * execute-only mode, there is no PTE at all backing the vsyscall + * page. + */ + if (vsyscall_mode == EMULATE) { + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, + PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); + } + + if (vsyscall_mode == XONLY) + gate_vma.vm_flags = VM_EXEC; + + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != + (unsigned long)VSYSCALL_ADDR); +} diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S new file mode 100644 index 000000000..ef2dd1827 --- /dev/null +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * vsyscall_emu_64.S: Vsyscall emulation page + * + * Copyright (c) 2011 Andy Lutomirski + */ + +#include + +#include +#include +#include + +__PAGE_ALIGNED_DATA + .globl __vsyscall_page + .balign PAGE_SIZE, 0xcc + .type __vsyscall_page, @object +__vsyscall_page: + + mov $__NR_gettimeofday, %rax + syscall + ret + int3 + + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall + ret + int3 + + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall + ret + int3 + + .balign 4096, 0xcc + + .size __vsyscall_page, 4096 diff --git a/arch/x86/entry/vsyscall/vsyscall_trace.h b/arch/x86/entry/vsyscall/vsyscall_trace.h new file mode 100644 index 000000000..3c3f9765a --- /dev/null +++ b/arch/x86/entry/vsyscall/vsyscall_trace.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vsyscall + +#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define __VSYSCALL_TRACE_H + +#include + +TRACE_EVENT(emulate_vsyscall, + + TP_PROTO(int nr), + + TP_ARGS(nr), + + TP_STRUCT__entry(__field(int, nr)), + + TP_fast_assign( + __entry->nr = nr; + ), + + TP_printk("nr = %d", __entry->nr) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/ +#define TRACE_INCLUDE_FILE vsyscall_trace +#include diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig new file mode 100644 index 000000000..dabdf3d7b --- /dev/null +++ b/arch/x86/events/Kconfig @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "Performance monitoring" + +config PERF_EVENTS_INTEL_UNCORE + tristate "Intel uncore performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + help + Include support for Intel uncore performance events. These are + available on NehalemEX and more modern processors. + +config PERF_EVENTS_INTEL_RAPL + tristate "Intel/AMD rapl performance events" + depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI + default y + help + Include support for Intel and AMD rapl performance events for power + monitoring on modern processors. + +config PERF_EVENTS_INTEL_CSTATE + tristate "Intel cstate performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + help + Include support for Intel cstate performance events for power + monitoring on modern processors. + +config PERF_EVENTS_AMD_POWER + depends on PERF_EVENTS && CPU_SUP_AMD + tristate "AMD Processor Power Reporting Mechanism" + help + Provide power reporting mechanism support for AMD processors. + Currently, it leverages X86_FEATURE_ACC_POWER + (CPUID Fn8000_0007_EDX[12]) interface to calculate the + average power consumption on Family 15h processors. + +config PERF_EVENTS_AMD_UNCORE + tristate "AMD Uncore performance events" + depends on PERF_EVENTS && CPU_SUP_AMD + default y + help + Include support for AMD uncore performance events for use with + e.g., perf stat -e amd_l3/.../,amd_df/.../. + + To compile this driver as a module, choose M here: the + module will be called 'amd-uncore'. + +config PERF_EVENTS_AMD_BRS + depends on PERF_EVENTS && CPU_SUP_AMD + bool "AMD Zen3 Branch Sampling support" + help + Enable AMD Zen3 branch sampling support (BRS) which samples up to + 16 consecutive taken branches in registers. + +endmenu diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile new file mode 100644 index 000000000..86a76efa8 --- /dev/null +++ b/arch/x86/events/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += core.o probe.o utils.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o +obj-y += amd/ +obj-$(CONFIG_X86_LOCAL_APIC) += msr.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/ +obj-$(CONFIG_CPU_SUP_CENTAUR) += zhaoxin/ +obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin/ diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile new file mode 100644 index 000000000..527d947eb --- /dev/null +++ b/arch/x86/events/amd/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CPU_SUP_AMD) += core.o lbr.o +obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o +obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o +obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o +obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o +amd-uncore-objs := uncore.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += iommu.o +endif diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c new file mode 100644 index 000000000..f1bff153d --- /dev/null +++ b/arch/x86/events/amd/brs.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implement support for AMD Fam19h Branch Sampling feature + * Based on specifications published in AMD PPR Fam19 Model 01 + * + * Copyright 2021 Google LLC + * Contributed by Stephane Eranian + */ +#include +#include +#include +#include + +#include "../perf_event.h" + +#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ + +/* Debug Extension Configuration register layout */ +union amd_debug_extn_cfg { + __u64 val; + struct { + __u64 rsvd0:2, /* reserved */ + brsmen:1, /* branch sample enable */ + rsvd4_3:2,/* reserved - must be 0x3 */ + vb:1, /* valid branches recorded */ + rsvd2:10, /* reserved */ + msroff:4, /* index of next entry to write */ + rsvd3:4, /* reserved */ + pmc:3, /* #PMC holding the sampling event */ + rsvd4:37; /* reserved */ + }; +}; + +static inline unsigned int brs_from(int idx) +{ + return MSR_AMD_SAMP_BR_FROM + 2 * idx; +} + +static inline unsigned int brs_to(int idx) +{ + return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; +} + +static inline void set_debug_extn_cfg(u64 val) +{ + /* bits[4:3] must always be set to 11b */ + wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); +} + +static inline u64 get_debug_extn_cfg(void) +{ + u64 val; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, val); + return val; +} + +static bool __init amd_brs_detect(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_BRS)) + return false; + + switch (boot_cpu_data.x86) { + case 0x19: /* AMD Fam19h (Zen3) */ + x86_pmu.lbr_nr = 16; + + /* No hardware filtering supported */ + x86_pmu.lbr_sel_map = NULL; + x86_pmu.lbr_sel_mask = 0; + break; + default: + return false; + } + + return true; +} + +/* + * Current BRS implementation does not support branch type or privilege level + * filtering. Therefore, this function simply enforces these limitations. No need for + * a br_sel_map. Software filtering is not supported because it would not correlate well + * with a sampling period. + */ +static int amd_brs_setup_filter(struct perf_event *event) +{ + u64 type = event->attr.branch_sample_type; + + /* No BRS support */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + /* Can only capture all branches, i.e., no filtering */ + if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY) + return -EINVAL; + + return 0; +} + +static inline int amd_is_brs_event(struct perf_event *e) +{ + return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; +} + +int amd_brs_hw_config(struct perf_event *event) +{ + int ret = 0; + + /* + * Due to interrupt holding, BRS is not recommended in + * counting mode. + */ + if (!is_sampling_event(event)) + return -EINVAL; + + /* + * Due to the way BRS operates by holding the interrupt until + * lbr_nr entries have been captured, it does not make sense + * to allow sampling on BRS with an event that does not match + * what BRS is capturing, i.e., retired taken branches. + * Otherwise the correlation with the event's period is even + * more loose: + * + * With retired taken branch: + * Effective P = P + 16 + X + * With any other event: + * Effective P = P + Y + X + * + * Where X is the number of taken branches due to interrupt + * skid. Skid is large. + * + * Where Y is the occurences of the event while BRS is + * capturing the lbr_nr entries. + * + * By using retired taken branches, we limit the impact on the + * Y variable. We know it cannot be more than the depth of + * BRS. + */ + if (!amd_is_brs_event(event)) + return -EINVAL; + + /* + * BRS implementation does not work with frequency mode + * reprogramming of the period. + */ + if (event->attr.freq) + return -EINVAL; + /* + * The kernel subtracts BRS depth from period, so it must + * be big enough. + */ + if (event->attr.sample_period <= x86_pmu.lbr_nr) + return -EINVAL; + + /* + * Check if we can allow PERF_SAMPLE_BRANCH_STACK + */ + ret = amd_brs_setup_filter(event); + + /* only set in case of success */ + if (!ret) + event->hw.flags |= PERF_X86_EVENT_AMD_BRS; + + return ret; +} + +/* tos = top of stack, i.e., last valid entry written */ +static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) +{ + /* + * msroff: index of next entry to write so top-of-stack is one off + * if BRS is full then msroff is set back to 0. + */ + return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; +} + +/* + * make sure we have a sane BRS offset to begin with + * especially with kexec + */ +void amd_brs_reset(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_BRS)) + return; + + /* + * Reset config + */ + set_debug_extn_cfg(0); + + /* + * Mark first entry as poisoned + */ + wrmsrl(brs_to(0), BRS_POISON); +} + +int __init amd_brs_init(void) +{ + if (!amd_brs_detect()) + return -EOPNOTSUPP; + + pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr); + + return 0; +} + +void amd_brs_enable(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union amd_debug_extn_cfg cfg; + + /* Activate only on first user */ + if (++cpuc->brs_active > 1) + return; + + cfg.val = 0; /* reset all fields */ + cfg.brsmen = 1; /* enable branch sampling */ + + /* Set enable bit */ + set_debug_extn_cfg(cfg.val); +} + +void amd_brs_enable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (cpuc->lbr_users) + amd_brs_enable(); +} + +void amd_brs_disable(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union amd_debug_extn_cfg cfg; + + /* Check if active (could be disabled via x86_pmu_disable_all()) */ + if (!cpuc->brs_active) + return; + + /* Only disable for last user */ + if (--cpuc->brs_active) + return; + + /* + * Clear the brsmen bit but preserve the others as they contain + * useful state such as vb and msroff + */ + cfg.val = get_debug_extn_cfg(); + + /* + * When coming in on interrupt and BRS is full, then hw will have + * already stopped BRS, no need to issue wrmsr again + */ + if (cfg.brsmen) { + cfg.brsmen = 0; + set_debug_extn_cfg(cfg.val); + } +} + +void amd_brs_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (cpuc->lbr_users) + amd_brs_disable(); +} + +static bool amd_brs_match_plm(struct perf_event *event, u64 to) +{ + int type = event->attr.branch_sample_type; + int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; + int plm_u = PERF_SAMPLE_BRANCH_USER; + + if (!(type & plm_k) && kernel_ip(to)) + return 0; + + if (!(type & plm_u) && !kernel_ip(to)) + return 0; + + return 1; +} + +/* + * Caller must ensure amd_brs_inuse() is true before calling + * return: + */ +void amd_brs_drain(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event = cpuc->events[0]; + struct perf_branch_entry *br = cpuc->lbr_entries; + union amd_debug_extn_cfg cfg; + u32 i, nr = 0, num, tos, start; + u32 shift = 64 - boot_cpu_data.x86_virt_bits; + + /* + * BRS event forced on PMC0, + * so check if there is an event. + * It is possible to have lbr_users > 0 but the event + * not yet scheduled due to long latency PMU irq + */ + if (!event) + goto empty; + + cfg.val = get_debug_extn_cfg(); + + /* Sanity check [0-x86_pmu.lbr_nr] */ + if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr)) + goto empty; + + /* No valid branch */ + if (cfg.vb == 0) + goto empty; + + /* + * msr.off points to next entry to be written + * tos = most recent entry index = msr.off - 1 + * BRS register buffer saturates, so we know we have + * start < tos and that we have to read from start to tos + */ + start = 0; + tos = amd_brs_get_tos(&cfg); + + num = tos - start + 1; + + /* + * BRS is only one pass (saturation) from MSROFF to depth-1 + * MSROFF wraps to zero when buffer is full + */ + for (i = 0; i < num; i++) { + u32 brs_idx = tos - i; + u64 from, to; + + rdmsrl(brs_to(brs_idx), to); + + /* Entry does not belong to us (as marked by kernel) */ + if (to == BRS_POISON) + break; + + /* + * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. + * Necessary to generate proper virtual addresses suitable for + * symbolization + */ + to = (u64)(((s64)to << shift) >> shift); + + if (!amd_brs_match_plm(event, to)) + continue; + + rdmsrl(brs_from(brs_idx), from); + + perf_clear_branch_entry_bitfields(br+nr); + + br[nr].from = from; + br[nr].to = to; + + nr++; + } +empty: + /* Record number of sampled branches */ + cpuc->lbr_stack.nr = nr; +} + +/* + * Poison most recent entry to prevent reuse by next task + * required because BRS entry are not tagged by PID + */ +static void amd_brs_poison_buffer(void) +{ + union amd_debug_extn_cfg cfg; + unsigned int idx; + + /* Get current state */ + cfg.val = get_debug_extn_cfg(); + + /* idx is most recently written entry */ + idx = amd_brs_get_tos(&cfg); + + /* Poison target of entry */ + wrmsrl(brs_to(idx), BRS_POISON); +} + +/* + * On context switch in, we need to make sure no samples from previous user + * are left in the BRS. + * + * On ctxswin, sched_in = true, called after the PMU has started + * On ctxswout, sched_in = false, called before the PMU is stopped + */ +void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* no active users */ + if (!cpuc->lbr_users) + return; + + /* + * On context switch in, we need to ensure we do not use entries + * from previous BRS user on that CPU, so we poison the buffer as + * a faster way compared to resetting all entries. + */ + if (sched_in) + amd_brs_poison_buffer(); +} + +/* + * called from ACPI processor_idle.c or acpi_pad.c + * with interrupts disabled + */ +void perf_amd_brs_lopwr_cb(bool lopwr_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union amd_debug_extn_cfg cfg; + + /* + * on mwait in, we may end up in non C0 state. + * we must disable branch sampling to avoid holding the NMI + * for too long. We disable it in hardware but we + * keep the state in cpuc, so we can re-enable. + * + * The hardware will deliver the NMI if needed when brsmen cleared + */ + if (cpuc->brs_active) { + cfg.val = get_debug_extn_cfg(); + cfg.brsmen = !lopwr_in; + set_debug_extn_cfg(cfg.val); + } +} + +DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); +EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb); + +void __init amd_brs_lopwr_init(void) +{ + static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb); +} diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c new file mode 100644 index 000000000..04f4b96de --- /dev/null +++ b/arch/x86/events/amd/core.c @@ -0,0 +1,1521 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../perf_event.h" + +static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); +static unsigned long perf_nmi_window; + +/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */ +#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) +#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) + +/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */ +static u64 amd_pmu_global_cntr_mask __read_mostly; + +static __initconst const u64 amd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ + [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ + [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ + [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ + [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static __initconst const u64 amd_hw_cache_event_ids_f17h + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */ + [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */ + [C(RESULT_MISS)] = 0, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */ + [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */ + [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */ + [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */ + [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +}; + +/* + * AMD Performance Monitor K7 and later, up to and including Family 16h: + */ +static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ +}; + +/* + * AMD Performance Monitor Family 17h and later: + */ +static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, +}; + +static u64 amd_pmu_event_map(int hw_event) +{ + if (boot_cpu_data.x86 >= 0x17) + return amd_f17h_perfmon_event_map[hw_event]; + + return amd_perfmon_event_map[hw_event]; +} + +/* + * Previously calculated offsets + */ +static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; + +/* + * Legacy CPUs: + * 4 counters starting at 0xc0010000 each offset by 1 + * + * CPUs with core performance counter extensions: + * 6 counters starting at 0xc0010200 each offset by 2 + */ +static inline int amd_pmu_addr_offset(int index, bool eventsel) +{ + int offset; + + if (!index) + return index; + + if (eventsel) + offset = event_offsets[index]; + else + offset = count_offsets[index]; + + if (offset) + return offset; + + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) + offset = index; + else + offset = index << 1; + + if (eventsel) + event_offsets[index] = offset; + else + count_offsets[index] = offset; + + return offset; +} + +/* + * AMD64 events are detected based on their event codes. + */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + +static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) +{ + if (!(x86_pmu.flags & PMU_FL_PAIR)) + return false; + + switch (amd_get_event_code(hwc)) { + case 0x003: return true; /* Retired SSE/AVX FLOPs */ + default: return false; + } +} + +DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config); + +static int amd_core_hw_config(struct perf_event *event) +{ + if (event->attr.exclude_host && event->attr.exclude_guest) + /* + * When HO == GO == 1 the hardware treats that as GO == HO == 0 + * and will count in both modes. We don't want to count in that + * case so we emulate no-counting by setting US = OS = 0. + */ + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); + else if (event->attr.exclude_host) + event->hw.config |= AMD64_EVENTSEL_GUESTONLY; + else if (event->attr.exclude_guest) + event->hw.config |= AMD64_EVENTSEL_HOSTONLY; + + if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) + event->hw.flags |= PERF_X86_EVENT_PAIR; + + if (has_branch_stack(event)) + return static_call(amd_pmu_branch_hw_config)(event); + + return 0; +} + +static inline int amd_is_nb_event(struct hw_perf_event *hwc) +{ + return (hwc->config & 0xe0) == 0xe0; +} + +static inline int amd_has_nb(struct cpu_hw_events *cpuc) +{ + struct amd_nb *nb = cpuc->amd_nb; + + return nb && nb->nb_id != -1; +} + +static int amd_pmu_hw_config(struct perf_event *event) +{ + int ret; + + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return forward_event_to_ibs(event); + + if (has_branch_stack(event) && !x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + if (event->attr.type == PERF_TYPE_RAW) + event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + + return amd_core_hw_config(event); +} + +static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct amd_nb *nb = cpuc->amd_nb; + int i; + + /* + * need to scan whole list because event may not have + * been assigned during scheduling + * + * no race condition possible because event can only + * be removed on one CPU at a time AND PMU is disabled + * when we come here + */ + for (i = 0; i < x86_pmu.num_counters; i++) { + if (cmpxchg(nb->owners + i, event, NULL) == event) + break; + } +} + + /* + * AMD64 NorthBridge events need special treatment because + * counter access needs to be synchronized across all cores + * of a package. Refer to BKDG section 3.12 + * + * NB events are events measuring L3 cache, Hypertransport + * traffic. They are identified by an event code >= 0xe00. + * They measure events on the NorthBride which is shared + * by all cores on a package. NB events are counted on a + * shared set of counters. When a NB event is programmed + * in a counter, the data actually comes from a shared + * counter. Thus, access to those counters needs to be + * synchronized. + * + * We implement the synchronization such that no two cores + * can be measuring NB events using the same counters. Thus, + * we maintain a per-NB allocation table. The available slot + * is propagated using the event_constraint structure. + * + * We provide only one choice for each NB event based on + * the fact that only NB events have restrictions. Consequently, + * if a counter is available, there is a guarantee the NB event + * will be assigned to it. If no slot is available, an empty + * constraint is returned and scheduling will eventually fail + * for this event. + * + * Note that all cores attached the same NB compete for the same + * counters to host NB events, this is why we use atomic ops. Some + * multi-chip CPUs may have more than one NB. + * + * Given that resources are allocated (cmpxchg), they must be + * eventually freed for others to use. This is accomplished by + * calling __amd_put_nb_event_constraints() + * + * Non NB events are not impacted by this restriction. + */ +static struct event_constraint * +__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, + struct event_constraint *c) +{ + struct hw_perf_event *hwc = &event->hw; + struct amd_nb *nb = cpuc->amd_nb; + struct perf_event *old; + int idx, new = -1; + + if (!c) + c = &unconstrained; + + if (cpuc->is_fake) + return c; + + /* + * detect if already present, if so reuse + * + * cannot merge with actual allocation + * because of possible holes + * + * event can already be present yet not assigned (in hwc->idx) + * because of successive calls to x86_schedule_events() from + * hw_perf_group_sched_in() without hw_perf_enable() + */ + for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { + if (new == -1 || hwc->idx == idx) + /* assign free slot, prefer hwc->idx */ + old = cmpxchg(nb->owners + idx, NULL, event); + else if (nb->owners[idx] == event) + /* event already present */ + old = event; + else + continue; + + if (old && old != event) + continue; + + /* reassign to this slot */ + if (new != -1) + cmpxchg(nb->owners + new, event, NULL); + new = idx; + + /* already present, reuse */ + if (old == event) + break; + } + + if (new == -1) + return &emptyconstraint; + + return &nb->event_constraints[new]; +} + +static struct amd_nb *amd_alloc_nb(int cpu) +{ + struct amd_nb *nb; + int i; + + nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); + if (!nb) + return NULL; + + nb->nb_id = -1; + + /* + * initialize all possible NB constraints + */ + for (i = 0; i < x86_pmu.num_counters; i++) { + __set_bit(i, nb->event_constraints[i].idxmsk); + nb->event_constraints[i].weight = 1; + } + return nb; +} + +typedef void (amd_pmu_branch_reset_t)(void); +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t); + +static void amd_pmu_cpu_reset(int cpu) +{ + if (x86_pmu.lbr_nr) + static_call(amd_pmu_branch_reset)(); + + if (x86_pmu.version < 2) + return; + + /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); + + /* + * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze + * and PerfCntrGLobalStatus.PerfCntrOvfl + */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask); +} + +static int amd_pmu_cpu_prepare(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL, + cpu_to_node(cpu)); + if (!cpuc->lbr_sel) + return -ENOMEM; + + WARN_ON_ONCE(cpuc->amd_nb); + + if (!x86_pmu.amd_nb_constraints) + return 0; + + cpuc->amd_nb = amd_alloc_nb(cpu); + if (cpuc->amd_nb) + return 0; + + kfree(cpuc->lbr_sel); + cpuc->lbr_sel = NULL; + + return -ENOMEM; +} + +static void amd_pmu_cpu_starting(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; + struct amd_nb *nb; + int i, nb_id; + + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; + amd_pmu_cpu_reset(cpu); + + if (!x86_pmu.amd_nb_constraints) + return; + + nb_id = topology_die_id(cpu); + WARN_ON_ONCE(nb_id == BAD_APICID); + + for_each_online_cpu(i) { + nb = per_cpu(cpu_hw_events, i).amd_nb; + if (WARN_ON_ONCE(!nb)) + continue; + + if (nb->nb_id == nb_id) { + *onln = cpuc->amd_nb; + cpuc->amd_nb = nb; + break; + } + } + + cpuc->amd_nb->nb_id = nb_id; + cpuc->amd_nb->refcnt++; +} + +static void amd_pmu_cpu_dead(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + kfree(cpuhw->lbr_sel); + cpuhw->lbr_sel = NULL; + amd_pmu_cpu_reset(cpu); + + if (!x86_pmu.amd_nb_constraints) + return; + + if (cpuhw->amd_nb) { + struct amd_nb *nb = cpuhw->amd_nb; + + if (nb->nb_id == -1 || --nb->refcnt == 0) + kfree(nb); + + cpuhw->amd_nb = NULL; + } +} + +static inline void amd_pmu_set_global_ctl(u64 ctl) +{ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); +} + +static inline u64 amd_pmu_get_global_status(void) +{ + u64 status; + + /* PerfCntrGlobalStatus is read-only */ + rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); + + return status; +} + +static inline void amd_pmu_ack_global_status(u64 status) +{ + /* + * PerfCntrGlobalStatus is read-only but an overflow acknowledgment + * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr + * clears the same bit in PerfCntrGlobalStatus + */ + + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); +} + +static bool amd_pmu_test_overflow_topbit(int idx) +{ + u64 counter; + + rdmsrl(x86_pmu_event_addr(idx), counter); + + return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); +} + +static bool amd_pmu_test_overflow_status(int idx) +{ + return amd_pmu_get_global_status() & BIT_ULL(idx); +} + +DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit); + +/* + * When a PMC counter overflows, an NMI is used to process the event and + * reset the counter. NMI latency can result in the counter being updated + * before the NMI can run, which can result in what appear to be spurious + * NMIs. This function is intended to wait for the NMI to run and reset + * the counter to avoid possible unhandled NMI messages. + */ +#define OVERFLOW_WAIT_COUNT 50 + +static void amd_pmu_wait_on_overflow(int idx) +{ + unsigned int i; + + /* + * Wait for the counter to be reset if it has overflowed. This loop + * should exit very, very quickly, but just in case, don't wait + * forever... + */ + for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { + if (!static_call(amd_pmu_test_overflow)(idx)) + break; + + /* Might be in IRQ context, so can't sleep */ + udelay(1); + } +} + +static void amd_pmu_check_overflow(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + /* + * This shouldn't be called from NMI context, but add a safeguard here + * to return, since if we're in NMI context we can't wait for an NMI + * to reset an overflowed counter value. + */ + if (in_nmi()) + return; + + /* + * Check each counter for overflow and wait for it to be reset by the + * NMI if it has overflowed. This relies on the fact that all active + * counters are always enabled when this function is called and + * ARCH_PERFMON_EVENTSEL_INT is always set. + */ + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + amd_pmu_wait_on_overflow(idx); + } +} + +static void amd_pmu_enable_event(struct perf_event *event) +{ + x86_pmu_enable_event(event); +} + +static void amd_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + amd_brs_enable_all(); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + /* only activate events which are marked as active */ + if (!test_bit(idx, cpuc->active_mask)) + continue; + + amd_pmu_enable_event(cpuc->events[idx]); + } +} + +static void amd_pmu_v2_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * Testing cpu_hw_events.enabled should be skipped in this case unlike + * in x86_pmu_enable_event(). + * + * Since cpu_hw_events.enabled is set only after returning from + * x86_pmu_start(), the PMCs must be programmed and kept ready. + * Counting starts only after x86_pmu_enable_all() is called. + */ + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); +} + +static __always_inline void amd_pmu_core_enable_all(void) +{ + amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); +} + +static void amd_pmu_v2_enable_all(int added) +{ + amd_pmu_lbr_enable_all(); + amd_pmu_core_enable_all(); +} + +static void amd_pmu_disable_event(struct perf_event *event) +{ + x86_pmu_disable_event(event); + + /* + * This can be called from NMI context (via x86_pmu_stop). The counter + * may have overflowed, but either way, we'll never see it get reset + * by the NMI if we're already in the NMI. And the NMI latency support + * below will take care of any pending NMI that might have been + * generated by the overflow. + */ + if (in_nmi()) + return; + + amd_pmu_wait_on_overflow(event->hw.idx); +} + +static void amd_pmu_disable_all(void) +{ + amd_brs_disable_all(); + x86_pmu_disable_all(); + amd_pmu_check_overflow(); +} + +static __always_inline void amd_pmu_core_disable_all(void) +{ + amd_pmu_set_global_ctl(0); +} + +static void amd_pmu_v2_disable_all(void) +{ + amd_pmu_core_disable_all(); + amd_pmu_lbr_disable_all(); + amd_pmu_check_overflow(); +} + +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add); + +static void amd_pmu_add_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + static_call(amd_pmu_branch_add)(event); +} + +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del); + +static void amd_pmu_del_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + static_call(amd_pmu_branch_del)(event); +} + +/* + * Because of NMI latency, if multiple PMC counters are active or other sources + * of NMIs are received, the perf NMI handler can handle one or more overflowed + * PMC counters outside of the NMI associated with the PMC overflow. If the NMI + * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel + * back-to-back NMI support won't be active. This PMC handler needs to take into + * account that this can occur, otherwise this could result in unknown NMI + * messages being issued. Examples of this is PMC overflow while in the NMI + * handler when multiple PMCs are active or PMC overflow while handling some + * other source of an NMI. + * + * Attempt to mitigate this by creating an NMI window in which un-handled NMIs + * received during this window will be claimed. This prevents extending the + * window past when it is possible that latent NMIs should be received. The + * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has + * handled a counter. When an un-handled NMI is received, it will be claimed + * only if arriving within that window. + */ +static inline int amd_pmu_adjust_nmi_window(int handled) +{ + /* + * If a counter was handled, record a timestamp such that un-handled + * NMIs will be claimed if arriving within that window. + */ + if (handled) { + this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window); + + return handled; + } + + if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) + return NMI_DONE; + + return NMI_HANDLED; +} + +static int amd_pmu_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int handled; + int pmu_enabled; + + /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled = cpuc->enabled; + cpuc->enabled = 0; + + amd_brs_disable_all(); + + /* Drain BRS is in use (could be inactive) */ + if (cpuc->lbr_users) + amd_brs_drain(); + + /* Process any counter overflows */ + handled = x86_pmu_handle_irq(regs); + + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + amd_brs_enable_all(); + + return amd_pmu_adjust_nmi_window(handled); +} + +static int amd_pmu_v2_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_sample_data data; + struct hw_perf_event *hwc; + struct perf_event *event; + int handled = 0, idx; + u64 reserved, status, mask; + bool pmu_enabled; + + /* + * Save the PMU state as it needs to be restored when leaving the + * handler + */ + pmu_enabled = cpuc->enabled; + cpuc->enabled = 0; + + /* Stop counting but do not disable LBR */ + amd_pmu_core_disable_all(); + + status = amd_pmu_get_global_status(); + + /* Check if any overflows are pending */ + if (!status) + goto done; + + /* Read branch records before unfreezing */ + if (status & GLOBAL_STATUS_LBRS_FROZEN) { + amd_pmu_lbr_read(); + status &= ~GLOBAL_STATUS_LBRS_FROZEN; + } + + reserved = status & ~amd_pmu_global_cntr_mask; + if (reserved) + pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n", + reserved); + + /* Clear any reserved bits set by buggy microcode */ + status &= amd_pmu_global_cntr_mask; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + x86_perf_event_update(event); + mask = BIT_ULL(idx); + + if (!(status & mask)) + continue; + + /* Event overflow */ + handled++; + status &= ~mask; + perf_sample_data_init(&data, 0, hwc->last_period); + + if (!x86_perf_event_set_period(event)) + continue; + + if (has_branch_stack(event)) { + data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * It should never be the case that some overflows are not handled as + * the corresponding PMCs are expected to be inactive according to the + * active_mask + */ + WARN_ON(status > 0); + + /* Clear overflow and freeze bits */ + amd_pmu_ack_global_status(~status); + + /* + * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT + * PMI entry is not set by the local APIC when a PMC overflow occurs + */ + inc_irq_stat(apic_perf_irqs); + +done: + cpuc->enabled = pmu_enabled; + + /* Resume counting only if PMU is active */ + if (pmu_enabled) + amd_pmu_core_enable_all(); + + return amd_pmu_adjust_nmi_window(handled); +} + +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + /* + * if not NB event or no NB, then no constraints + */ + if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) + return &unconstrained; + + return __amd_get_nb_event_constraints(cpuc, event, NULL); +} + +static void amd_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) + __amd_put_nb_event_constraints(cpuc, event); +} + +PMU_FORMAT_ATTR(event, "config:0-7,32-35"); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *amd_format_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +/* AMD Family 15h */ + +#define AMD_EVENT_TYPE_MASK 0x000000F0ULL + +#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL +#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL +#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL +#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL +#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL +#define AMD_EVENT_EX_LS 0x000000C0ULL +#define AMD_EVENT_DE 0x000000D0ULL +#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL + +/* + * AMD family 15h event code/PMC mappings: + * + * type = event_code & 0x0F0: + * + * 0x000 FP PERF_CTL[5:3] + * 0x010 FP PERF_CTL[5:3] + * 0x020 LS PERF_CTL[5:0] + * 0x030 LS PERF_CTL[5:0] + * 0x040 DC PERF_CTL[5:0] + * 0x050 DC PERF_CTL[5:0] + * 0x060 CU PERF_CTL[2:0] + * 0x070 CU PERF_CTL[2:0] + * 0x080 IC/DE PERF_CTL[2:0] + * 0x090 IC/DE PERF_CTL[2:0] + * 0x0A0 --- + * 0x0B0 --- + * 0x0C0 EX/LS PERF_CTL[5:0] + * 0x0D0 DE PERF_CTL[2:0] + * 0x0E0 NB NB_PERF_CTL[3:0] + * 0x0F0 NB NB_PERF_CTL[3:0] + * + * Exceptions: + * + * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) + * 0x003 FP PERF_CTL[3] + * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) + * 0x00B FP PERF_CTL[3] + * 0x00D FP PERF_CTL[3] + * 0x023 DE PERF_CTL[2:0] + * 0x02D LS PERF_CTL[3] + * 0x02E LS PERF_CTL[3,0] + * 0x031 LS PERF_CTL[2:0] (**) + * 0x043 CU PERF_CTL[2:0] + * 0x045 CU PERF_CTL[2:0] + * 0x046 CU PERF_CTL[2:0] + * 0x054 CU PERF_CTL[2:0] + * 0x055 CU PERF_CTL[2:0] + * 0x08F IC PERF_CTL[0] + * 0x187 DE PERF_CTL[0] + * 0x188 DE PERF_CTL[0] + * 0x0DB EX PERF_CTL[5:0] + * 0x0DC LS PERF_CTL[5:0] + * 0x0DD LS PERF_CTL[5:0] + * 0x0DE LS PERF_CTL[5:0] + * 0x0DF LS PERF_CTL[5:0] + * 0x1C0 EX PERF_CTL[5:3] + * 0x1D6 EX PERF_CTL[5:0] + * 0x1D8 EX PERF_CTL[5:0] + * + * (*) depending on the umask all FPU counters may be used + * (**) only one unitmask enabled at a time + */ + +static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); +static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); +static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); +static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); +static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); +static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); + +static struct event_constraint * +amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned int event_code = amd_get_event_code(hwc); + + switch (event_code & AMD_EVENT_TYPE_MASK) { + case AMD_EVENT_FP: + switch (event_code) { + case 0x000: + if (!(hwc->config & 0x0000F000ULL)) + break; + if (!(hwc->config & 0x00000F00ULL)) + break; + return &amd_f15_PMC3; + case 0x004: + if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) + break; + return &amd_f15_PMC3; + case 0x003: + case 0x00B: + case 0x00D: + return &amd_f15_PMC3; + } + return &amd_f15_PMC53; + case AMD_EVENT_LS: + case AMD_EVENT_DC: + case AMD_EVENT_EX_LS: + switch (event_code) { + case 0x023: + case 0x043: + case 0x045: + case 0x046: + case 0x054: + case 0x055: + return &amd_f15_PMC20; + case 0x02D: + return &amd_f15_PMC3; + case 0x02E: + return &amd_f15_PMC30; + case 0x031: + if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) + return &amd_f15_PMC20; + return &emptyconstraint; + case 0x1C0: + return &amd_f15_PMC53; + default: + return &amd_f15_PMC50; + } + case AMD_EVENT_CU: + case AMD_EVENT_IC_DE: + case AMD_EVENT_DE: + switch (event_code) { + case 0x08F: + case 0x187: + case 0x188: + return &amd_f15_PMC0; + case 0x0DB ... 0x0DF: + case 0x1D6: + case 0x1D8: + return &amd_f15_PMC50; + default: + return &amd_f15_PMC20; + } + case AMD_EVENT_NB: + /* moved to uncore.c */ + return &emptyconstraint; + default: + return &emptyconstraint; + } +} + +static struct event_constraint pair_constraint; + +static struct event_constraint * +amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (amd_is_pair_event_code(hwc)) + return &pair_constraint; + + return &unconstrained; +} + +static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (is_counter_pair(hwc)) + --cpuc->n_pair; +} + +/* + * Because of the way BRS operates with an inactive and active phases, and + * the link to one counter, it is not possible to have two events using BRS + * scheduled at the same time. There would be an issue with enforcing the + * period of each one and given that the BRS saturates, it would not be possible + * to guarantee correlated content for all events. Therefore, in situations + * where multiple events want to use BRS, the kernel enforces mutual exclusion. + * Exclusion is enforced by chosing only one counter for events using BRS. + * The event scheduling logic will then automatically multiplex the + * events and ensure that at most one event is actively using BRS. + * + * The BRS counter could be any counter, but there is no constraint on Fam19h, + * therefore all counters are equal and thus we pick the first one: PMC0 + */ +static struct event_constraint amd_fam19h_brs_cntr0_constraint = + EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK); + +static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint = + __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR); + +static struct event_constraint * +amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + bool has_brs = has_amd_brs(hwc); + + /* + * In case BRS is used with an event requiring a counter pair, + * the kernel allows it but only on counter 0 & 1 to enforce + * multiplexing requiring to protect BRS in case of multiple + * BRS users + */ + if (amd_is_pair_event_code(hwc)) { + return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint + : &pair_constraint; + } + + if (has_brs) + return &amd_fam19h_brs_cntr0_constraint; + + return &unconstrained; +} + + +static ssize_t amd_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | + (config & AMD64_EVENTSEL_EVENT) >> 24; + + return x86_event_sysfs_show(page, config, event); +} + +static void amd_pmu_limit_period(struct perf_event *event, s64 *left) +{ + /* + * Decrease period by the depth of the BRS feature to get the last N + * taken branches and approximate the desired period + */ + if (has_branch_stack(event) && *left > x86_pmu.lbr_nr) + *left -= x86_pmu.lbr_nr; +} + +static __initconst const struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = amd_pmu_handle_irq, + .disable_all = amd_pmu_disable_all, + .enable_all = amd_pmu_enable_all, + .enable = amd_pmu_enable_event, + .disable = amd_pmu_disable_event, + .hw_config = amd_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .addr_offset = amd_pmu_addr_offset, + .event_map = amd_pmu_event_map, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_counters = AMD64_NUM_COUNTERS, + .add = amd_pmu_add_event, + .del = amd_pmu_del_event, + .cntval_bits = 48, + .cntval_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = amd_get_event_constraints, + .put_event_constraints = amd_put_event_constraints, + + .format_attrs = amd_format_attr, + .events_sysfs_show = amd_event_sysfs_show, + + .cpu_prepare = amd_pmu_cpu_prepare, + .cpu_starting = amd_pmu_cpu_starting, + .cpu_dead = amd_pmu_cpu_dead, + + .amd_nb_constraints = 1, +}; + +static ssize_t branches_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); +} + +static DEVICE_ATTR_RO(branches); + +static struct attribute *amd_pmu_branches_attrs[] = { + &dev_attr_branches.attr, + NULL, +}; + +static umode_t +amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} + +static struct attribute_group group_caps_amd_branches = { + .name = "caps", + .attrs = amd_pmu_branches_attrs, + .is_visible = amd_branches_is_visible, +}; + +#ifdef CONFIG_PERF_EVENTS_AMD_BRS + +EVENT_ATTR_STR(branch-brs, amd_branch_brs, + "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n"); + +static struct attribute *amd_brs_events_attrs[] = { + EVENT_PTR(amd_branch_brs), + NULL, +}; + +static umode_t +amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ? + attr->mode : 0; +} + +static struct attribute_group group_events_amd_brs = { + .name = "events", + .attrs = amd_brs_events_attrs, + .is_visible = amd_brs_is_visible, +}; + +#endif /* CONFIG_PERF_EVENTS_AMD_BRS */ + +static const struct attribute_group *amd_attr_update[] = { + &group_caps_amd_branches, +#ifdef CONFIG_PERF_EVENTS_AMD_BRS + &group_events_amd_brs, +#endif + NULL, +}; + +static int __init amd_core_pmu_init(void) +{ + union cpuid_0x80000022_ebx ebx; + u64 even_ctr_mask = 0ULL; + int i; + + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) + return 0; + + /* Avoid calculating the value each time in the NMI handler */ + perf_nmi_window = msecs_to_jiffies(100); + + /* + * If core performance counter extensions exists, we must use + * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also + * amd_pmu_addr_offset(). + */ + x86_pmu.eventsel = MSR_F15H_PERF_CTL; + x86_pmu.perfctr = MSR_F15H_PERF_CTR; + x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + + /* Check for Performance Monitoring v2 support */ + if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + + /* Update PMU version for later usage */ + x86_pmu.version = 2; + + /* Find the number of available Core PMCs */ + x86_pmu.num_counters = ebx.split.num_core_pmc; + + amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; + + /* Update PMC handling functions */ + x86_pmu.enable_all = amd_pmu_v2_enable_all; + x86_pmu.disable_all = amd_pmu_v2_disable_all; + x86_pmu.enable = amd_pmu_v2_enable_event; + x86_pmu.handle_irq = amd_pmu_v2_handle_irq; + static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status); + } + + /* + * AMD Core perfctr has separate MSRs for the NB events, see + * the amd/uncore.c driver. + */ + x86_pmu.amd_nb_constraints = 0; + + if (boot_cpu_data.x86 == 0x15) { + pr_cont("Fam15h "); + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + } + if (boot_cpu_data.x86 >= 0x17) { + pr_cont("Fam17h+ "); + /* + * Family 17h and compatibles have constraints for Large + * Increment per Cycle events: they may only be assigned an + * even numbered counter that has a consecutive adjacent odd + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + even_ctr_mask |= BIT_ULL(i); + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, + x86_pmu.num_counters / 2, 0, + PERF_X86_EVENT_PAIR); + + x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; + x86_pmu.put_event_constraints = amd_put_event_constraints_f17h; + x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE; + x86_pmu.flags |= PMU_FL_PAIR; + } + + /* LBR and BRS are mutually exclusive features */ + if (!amd_pmu_lbr_init()) { + /* LBR requires flushing on context switch */ + x86_pmu.sched_task = amd_pmu_lbr_sched_task; + static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config); + static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); + static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); + static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); + } else if (!amd_brs_init()) { + /* + * BRS requires special event constraints and flushing on ctxsw. + */ + x86_pmu.get_event_constraints = amd_get_event_constraints_f19h; + x86_pmu.sched_task = amd_pmu_brs_sched_task; + x86_pmu.limit_period = amd_pmu_limit_period; + + static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config); + static_call_update(amd_pmu_branch_reset, amd_brs_reset); + static_call_update(amd_pmu_branch_add, amd_pmu_brs_add); + static_call_update(amd_pmu_branch_del, amd_pmu_brs_del); + + /* + * put_event_constraints callback same as Fam17h, set above + */ + + /* branch sampling must be stopped when entering low power */ + amd_brs_lopwr_init(); + } + + x86_pmu.attr_update = amd_attr_update; + + pr_cont("core perfctr, "); + return 0; +} + +__init int amd_pmu_init(void) +{ + int ret; + + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + + x86_pmu = amd_pmu; + + ret = amd_core_pmu_init(); + if (ret) + return ret; + + if (num_possible_cpus() == 1) { + /* + * No point in allocating data structures to serialize + * against other CPUs, when there is only the one CPU. + */ + x86_pmu.amd_nb_constraints = 0; + } + + if (boot_cpu_data.x86 >= 0x17) + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids)); + else + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + + return 0; +} + +static inline void amd_pmu_reload_virt(void) +{ + if (x86_pmu.version >= 2) { + /* + * Clear global enable bits, reprogram the PERF_CTL + * registers with updated perf_ctr_virt_mask and then + * set global enable bits once again + */ + amd_pmu_v2_disable_all(); + amd_pmu_enable_all(0); + amd_pmu_v2_enable_all(0); + return; + } + + amd_pmu_disable_all(); + amd_pmu_enable_all(0); +} + +void amd_pmu_enable_virt(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + cpuc->perf_ctr_virt_mask = 0; + + /* Reload all events */ + amd_pmu_reload_virt(); +} +EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); + +void amd_pmu_disable_virt(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * We only mask out the Host-only bit so that host-only counting works + * when SVM is disabled. If someone sets up a guest-only counter when + * SVM is disabled the Guest-only bits still gets set and the counter + * will not count anything. + */ + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; + + /* Reload all events */ + amd_pmu_reload_virt(); +} +EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c new file mode 100644 index 000000000..37cbbc5c6 --- /dev/null +++ b/arch/x86/events/amd/ibs.c @@ -0,0 +1,1541 @@ +/* + * Performance events - AMD IBS + * + * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../perf_event.h" + +static u32 ibs_caps; + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) + +#include +#include + +#include +#include + +#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) +#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT + + +/* + * IBS states: + * + * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken + * and any further add()s must fail. + * + * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are + * complicated by the fact that the IBS hardware can send late NMIs (ie. after + * we've cleared the EN bit). + * + * In order to consume these late NMIs we have the STOPPED state, any NMI that + * happens after we've cleared the EN state will clear this bit and report the + * NMI handled (this is fundamentally racy in the face or multiple NMI sources, + * someone else can consume our BIT and our NMI will go unhandled). + * + * And since we cannot set/clear this separate bit together with the EN bit, + * there are races; if we cleared STARTED early, an NMI could land in + * between clearing STARTED and clearing the EN bit (in fact multiple NMIs + * could happen if the period is small enough), and consume our STOPPED bit + * and trigger streams of unhandled NMIs. + * + * If, however, we clear STARTED late, an NMI can hit between clearing the + * EN bit and clearing STARTED, still see STARTED set and process the event. + * If this event will have the VALID bit clear, we bail properly, but this + * is not a given. With VALID set we can end up calling pmu::stop() again + * (the throttle logic) and trigger the WARNs in there. + * + * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() + * nesting, and clear STARTED late, so that we have a well defined state over + * the clearing of the EN bit. + * + * XXX: we could probably be using !atomic bitops for all this. + */ + +enum ibs_states { + IBS_ENABLED = 0, + IBS_STARTED = 1, + IBS_STOPPING = 2, + IBS_STOPPED = 3, + + IBS_MAX_STATES, +}; + +struct cpu_perf_ibs { + struct perf_event *event; + unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; +}; + +struct perf_ibs { + struct pmu pmu; + unsigned int msr; + u64 config_mask; + u64 cnt_mask; + u64 enable_mask; + u64 valid_mask; + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; + unsigned int fetch_count_reset_broken : 1; + unsigned int fetch_ignore_if_zero_rip : 1; + struct cpu_perf_ibs __percpu *pcpu; + + u64 (*get_count)(u64 config); +}; + +static int +perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) +{ + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int overflow = 0; + + /* + * If we are way outside a reasonable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (unlikely(left < (s64)min)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + /* + * If the hw period that triggers the sw overflow is too short + * we might hit the irq handler. This biases the results. + * Thus we shorten the next-to-last period and set the last + * period to the max period. + */ + if (left > max) { + left -= max; + if (left > max) + left = max; + else if (left < min) + left = min; + } + + *hw_period = (u64)left; + + return overflow; +} + +static int +perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - width; + u64 prev_raw_count; + u64 delta; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ + prev_raw_count = local64_read(&hwc->prev_count); + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + return 0; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return 1; +} + +static struct perf_ibs perf_ibs_fetch; +static struct perf_ibs perf_ibs_op; + +static struct perf_ibs *get_ibs_pmu(int type) +{ + if (perf_ibs_fetch.pmu.type == type) + return &perf_ibs_fetch; + if (perf_ibs_op.pmu.type == type) + return &perf_ibs_op; + return NULL; +} + +/* + * core pmu config -> IBS config + * + * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count + * perf record -a -e r076:p ... # same as -e cpu-cycles:p + * perf record -a -e r0C1:p ... # use ibs op counting micro-ops + * + * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, + * MSRC001_1033) is used to select either cycle or micro-ops counting + * mode. + */ +static int core_pmu_ibs_config(struct perf_event *event, u64 *config) +{ + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + switch (event->attr.config) { + case PERF_COUNT_HW_CPU_CYCLES: + *config = 0; + return 0; + } + break; + case PERF_TYPE_RAW: + switch (event->attr.config) { + case 0x0076: + *config = 0; + return 0; + case 0x00C1: + *config = IBS_OP_CNT_CTL; + return 0; + } + break; + default: + return -ENOENT; + } + + return -EOPNOTSUPP; +} + +/* + * The rip of IBS samples has skid 0. Thus, IBS supports precise + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the + * rip is invalid when IBS was not able to record the rip correctly. + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. + */ +int forward_event_to_ibs(struct perf_event *event) +{ + u64 config = 0; + + if (!event->attr.precise_ip || event->attr.precise_ip > 2) + return -EOPNOTSUPP; + + if (!core_pmu_ibs_config(event, &config)) { + event->attr.type = perf_ibs_op.pmu.type; + event->attr.config = config; + } + return -ENOENT; +} + +static int perf_ibs_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs; + u64 max_cnt, config; + + perf_ibs = get_ibs_pmu(event->attr.type); + if (!perf_ibs) + return -ENOENT; + + config = event->attr.config; + + if (event->pmu != &perf_ibs->pmu) + return -ENOENT; + + if (config & ~perf_ibs->config_mask) + return -EINVAL; + + if (hwc->sample_period) { + if (config & perf_ibs->cnt_mask) + /* raw max_cnt may not be set */ + return -EINVAL; + if (!event->attr.sample_freq && hwc->sample_period & 0x0f) + /* + * lower 4 bits can not be set in ibs max cnt, + * but allowing it in case we adjust the + * sample period to set a frequency. + */ + return -EINVAL; + hwc->sample_period &= ~0x0FULL; + if (!hwc->sample_period) + hwc->sample_period = 0x10; + } else { + max_cnt = config & perf_ibs->cnt_mask; + config &= ~perf_ibs->cnt_mask; + event->attr.sample_period = max_cnt << 4; + hwc->sample_period = event->attr.sample_period; + } + + if (!hwc->sample_period) + return -EINVAL; + + /* + * If we modify hwc->sample_period, we also need to update + * hwc->last_period and hwc->period_left. + */ + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + + hwc->config_base = perf_ibs->msr; + hwc->config = config; + + return 0; +} + +static int perf_ibs_set_period(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 *period) +{ + int overflow; + + /* ignore lower 4 bits in min count: */ + overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); + local64_set(&hwc->prev_count, 0); + + return overflow; +} + +static u64 get_ibs_fetch_count(u64 config) +{ + union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; + + return fetch_ctl.fetch_cnt << 4; +} + +static u64 get_ibs_op_count(u64 config) +{ + union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; + u64 count = 0; + + /* + * If the internal 27-bit counter rolled over, the count is MaxCnt + * and the lower 7 bits of CurCnt are randomized. + * Otherwise CurCnt has the full 27-bit current counter value. + */ + if (op_ctl.op_val) { + count = op_ctl.opmaxcnt << 4; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + count += op_ctl.opmaxcnt_ext << 20; + } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { + count = op_ctl.opcurcnt; + } + + return count; +} + +static void +perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, + u64 *config) +{ + u64 count = perf_ibs->get_count(*config); + + /* + * Set width to 64 since we do not overflow on max width but + * instead on max count. In perf_ibs_set_period() we clear + * prev count manually on overflow. + */ + while (!perf_event_try_update(event, count, 64)) { + rdmsrl(event->hw.config_base, *config); + count = perf_ibs->get_count(*config); + } +} + +static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) +{ + u64 tmp = hwc->config | config; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + + wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); +} + +/* + * Erratum #420 Instruction-Based Sampling Engine May Generate + * Interrupt that Cannot Be Cleared: + * + * Must clear counter mask first, then clear the enable bit. See + * Revision Guide for AMD Family 10h Processors, Publication #41322. + */ +static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) +{ + config &= ~perf_ibs->cnt_mask; + if (boot_cpu_data.x86 == 0x10) + wrmsrl(hwc->config_base, config); + config &= ~perf_ibs->enable_mask; + wrmsrl(hwc->config_base, config); +} + +/* + * We cannot restore the ibs pmu state, so we always needs to update + * the event while stopping it and then reset the state when starting + * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in + * perf_ibs_start()/perf_ibs_stop() and instead always do it. + */ +static void perf_ibs_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + u64 period, config = 0; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + perf_ibs_set_period(perf_ibs, hwc, &period); + if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { + config |= period & IBS_OP_MAX_CNT_EXT_MASK; + period &= ~IBS_OP_MAX_CNT_EXT_MASK; + } + config |= period >> 4; + + /* + * Set STARTED before enabling the hardware, such that a subsequent NMI + * must observe it. + */ + set_bit(IBS_STARTED, pcpu->state); + clear_bit(IBS_STOPPING, pcpu->state); + perf_ibs_enable_event(perf_ibs, hwc, config); + + perf_event_update_userpage(event); +} + +static void perf_ibs_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + u64 config; + int stopping; + + if (test_and_set_bit(IBS_STOPPING, pcpu->state)) + return; + + stopping = test_bit(IBS_STARTED, pcpu->state); + + if (!stopping && (hwc->state & PERF_HES_UPTODATE)) + return; + + rdmsrl(hwc->config_base, config); + + if (stopping) { + /* + * Set STOPPED before disabling the hardware, such that it + * must be visible to NMIs the moment we clear the EN bit, + * at which point we can generate an !VALID sample which + * we need to consume. + */ + set_bit(IBS_STOPPED, pcpu->state); + perf_ibs_disable_event(perf_ibs, hwc, config); + /* + * Clear STARTED after disabling the hardware; if it were + * cleared before an NMI hitting after the clear but before + * clearing the EN bit might think it a spurious NMI and not + * handle it. + * + * Clearing it after, however, creates the problem of the NMI + * handler seeing STARTED but not having a valid sample. + */ + clear_bit(IBS_STARTED, pcpu->state); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* + * Clear valid bit to not count rollovers on update, rollovers + * are only updated in the irq handler. + */ + config &= ~perf_ibs->valid_mask; + + perf_ibs_event_update(perf_ibs, event, &config); + hwc->state |= PERF_HES_UPTODATE; +} + +static int perf_ibs_add(struct perf_event *event, int flags) +{ + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + + if (test_and_set_bit(IBS_ENABLED, pcpu->state)) + return -ENOSPC; + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + pcpu->event = event; + + if (flags & PERF_EF_START) + perf_ibs_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void perf_ibs_del(struct perf_event *event, int flags) +{ + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + + if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) + return; + + perf_ibs_stop(event, PERF_EF_UPDATE); + + pcpu->event = NULL; + + perf_event_update_userpage(event); +} + +static void perf_ibs_read(struct perf_event *event) { } + +/* + * We need to initialize with empty group if all attributes in the + * group are dynamic. + */ +static struct attribute *attrs_empty[] = { + NULL, +}; + +static struct attribute_group empty_format_group = { + .name = "format", + .attrs = attrs_empty, +}; + +static struct attribute_group empty_caps_group = { + .name = "caps", + .attrs = attrs_empty, +}; + +static const struct attribute_group *empty_attr_groups[] = { + &empty_format_group, + &empty_caps_group, + NULL, +}; + +PMU_FORMAT_ATTR(rand_en, "config:57"); +PMU_FORMAT_ATTR(cnt_ctl, "config:19"); +PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); +PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); +PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); + +static umode_t +zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; +} + +static struct attribute *rand_en_attrs[] = { + &format_attr_rand_en.attr, + NULL, +}; + +static struct attribute *fetch_l3missonly_attrs[] = { + &fetch_l3missonly.attr.attr, + NULL, +}; + +static struct attribute *zen4_ibs_extensions_attrs[] = { + &zen4_ibs_extensions.attr.attr, + NULL, +}; + +static struct attribute_group group_rand_en = { + .name = "format", + .attrs = rand_en_attrs, +}; + +static struct attribute_group group_fetch_l3missonly = { + .name = "format", + .attrs = fetch_l3missonly_attrs, + .is_visible = zen4_ibs_extensions_is_visible, +}; + +static struct attribute_group group_zen4_ibs_extensions = { + .name = "caps", + .attrs = zen4_ibs_extensions_attrs, + .is_visible = zen4_ibs_extensions_is_visible, +}; + +static const struct attribute_group *fetch_attr_groups[] = { + &group_rand_en, + &empty_caps_group, + NULL, +}; + +static const struct attribute_group *fetch_attr_update[] = { + &group_fetch_l3missonly, + &group_zen4_ibs_extensions, + NULL, +}; + +static umode_t +cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; +} + +static struct attribute *cnt_ctl_attrs[] = { + &format_attr_cnt_ctl.attr, + NULL, +}; + +static struct attribute *op_l3missonly_attrs[] = { + &op_l3missonly.attr.attr, + NULL, +}; + +static struct attribute_group group_cnt_ctl = { + .name = "format", + .attrs = cnt_ctl_attrs, + .is_visible = cnt_ctl_is_visible, +}; + +static struct attribute_group group_op_l3missonly = { + .name = "format", + .attrs = op_l3missonly_attrs, + .is_visible = zen4_ibs_extensions_is_visible, +}; + +static const struct attribute_group *op_attr_update[] = { + &group_cnt_ctl, + &group_op_l3missonly, + &group_zen4_ibs_extensions, + NULL, +}; + +static struct perf_ibs perf_ibs_fetch = { + .pmu = { + .task_ctx_nr = perf_invalid_context, + + .event_init = perf_ibs_init, + .add = perf_ibs_add, + .del = perf_ibs_del, + .start = perf_ibs_start, + .stop = perf_ibs_stop, + .read = perf_ibs_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }, + .msr = MSR_AMD64_IBSFETCHCTL, + .config_mask = IBS_FETCH_CONFIG_MASK, + .cnt_mask = IBS_FETCH_MAX_CNT, + .enable_mask = IBS_FETCH_ENABLE, + .valid_mask = IBS_FETCH_VAL, + .max_period = IBS_FETCH_MAX_CNT << 4, + .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, + .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, + + .get_count = get_ibs_fetch_count, +}; + +static struct perf_ibs perf_ibs_op = { + .pmu = { + .task_ctx_nr = perf_invalid_context, + + .event_init = perf_ibs_init, + .add = perf_ibs_add, + .del = perf_ibs_del, + .start = perf_ibs_start, + .stop = perf_ibs_stop, + .read = perf_ibs_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }, + .msr = MSR_AMD64_IBSOPCTL, + .config_mask = IBS_OP_CONFIG_MASK, + .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | + IBS_OP_CUR_CNT_RAND, + .enable_mask = IBS_OP_ENABLE, + .valid_mask = IBS_OP_VAL, + .max_period = IBS_OP_MAX_CNT << 4, + .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, + .offset_max = MSR_AMD64_IBSOP_REG_COUNT, + + .get_count = get_ibs_op_count, +}; + +static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_op = PERF_MEM_OP_NA; + + if (op_data3->ld_op) + data_src->mem_op = PERF_MEM_OP_LOAD; + else if (op_data3->st_op) + data_src->mem_op = PERF_MEM_OP_STORE; +} + +/* + * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has + * more fine granular DataSrc encodings. Others have coarse. + */ +static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) +{ + if (ibs_caps & IBS_CAPS_ZEN4) + return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; + + return op_data2->data_src_lo; +} + +static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + u8 ibs_data_src = perf_ibs_data_src(op_data2); + + data_src->mem_lvl = 0; + + /* + * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached + * memory accesses. So, check DcUcMemAcc bit early. + */ + if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) { + data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT; + return; + } + + /* L1 Hit */ + if (op_data3->dc_miss == 0) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + return; + } + + /* L2 Hit */ + if (op_data3->l2_miss == 0) { + /* Erratum #1293 */ + if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || + !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + return; + } + } + + /* + * OP_DATA2 is valid only for load ops. Skip all checks which + * uses OP_DATA2[DataSrc]. + */ + if (data_src->mem_op != PERF_MEM_OP_LOAD) + goto check_mab; + + /* L3 Hit */ + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + return; + } + } else { + if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 | + PERF_MEM_LVL_HIT; + return; + } + } + + /* A peer cache in a near CCX */ + if (ibs_caps & IBS_CAPS_ZEN4 && + ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + return; + } + + /* A peer cache in a far CCX */ + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; + return; + } + } else { + if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; + return; + } + } + + /* DRAM */ + if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) { + if (op_data2->rmt_node == 0) + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + else + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + return; + } + + /* PMEM */ + if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) { + data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + + /* Extension Memory */ + if (ibs_caps & IBS_CAPS_ZEN4 && + ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) { + data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + + /* IO */ + if (ibs_data_src == IBS_DATA_SRC_EXT_IO) { + data_src->mem_lvl = PERF_MEM_LVL_IO; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + +check_mab: + /* + * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding + * DC misses. However, such data may come from any level in mem + * hierarchy. IBS provides detail about both MAB as well as actual + * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set + * MAB only when IBS fails to provide DataSrc. + */ + if (op_data3->dc_miss_no_mab_alloc) { + data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; + return; + } + + data_src->mem_lvl = PERF_MEM_LVL_NA; +} + +static bool perf_ibs_cache_hit_st_valid(void) +{ + /* 0: Uninitialized, 1: Valid, -1: Invalid */ + static int cache_hit_st_valid; + + if (unlikely(!cache_hit_st_valid)) { + if (boot_cpu_data.x86 == 0x19 && + (boot_cpu_data.x86_model <= 0xF || + (boot_cpu_data.x86_model >= 0x20 && + boot_cpu_data.x86_model <= 0x5F))) { + cache_hit_st_valid = -1; + } else { + cache_hit_st_valid = 1; + } + } + + return cache_hit_st_valid == 1; +} + +static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + u8 ibs_data_src; + + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + + if (!perf_ibs_cache_hit_st_valid() || + data_src->mem_op != PERF_MEM_OP_LOAD || + data_src->mem_lvl & PERF_MEM_LVL_L1 || + data_src->mem_lvl & PERF_MEM_LVL_L2 || + op_data2->cache_hit_st) + return; + + ibs_data_src = perf_ibs_data_src(op_data2); + + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || + ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || + ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + } +} + +static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_dtlb = PERF_MEM_TLB_NA; + + if (!op_data3->dc_lin_addr_valid) + return; + + if (!op_data3->dc_l1tlb_miss) { + data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; + return; + } + + if (!op_data3->dc_l2tlb_miss) { + data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; + return; + } + + data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; +} + +static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_lock = PERF_MEM_LOCK_NA; + + if (op_data3->dc_locked_op) + data_src->mem_lock = PERF_MEM_LOCK_LOCKED; +} + +#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) + +static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, + struct perf_sample_data *data, + union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3) +{ + perf_ibs_get_mem_lvl(op_data2, op_data3, data); + perf_ibs_get_mem_snoop(op_data2, data); + perf_ibs_get_tlb_lvl(op_data3, data); + perf_ibs_get_mem_lock(op_data3, data); +} + +static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, + union ibs_op_data3 *op_data3) +{ + __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; + + /* Erratum #1293 */ + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && + (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { + /* + * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. + * DataSrc=0 is 'No valid status' and RmtNode is invalid when + * DataSrc=0. + */ + val = 0; + } + return val; +} + +static void perf_ibs_parse_ld_st_data(__u64 sample_type, + struct perf_ibs_data *ibs_data, + struct perf_sample_data *data) +{ + union ibs_op_data3 op_data3; + union ibs_op_data2 op_data2; + union ibs_op_data op_data; + + data->data_src.val = PERF_MEM_NA; + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + + perf_ibs_get_mem_op(&op_data3, data); + if (data->data_src.mem_op != PERF_MEM_OP_LOAD && + data->data_src.mem_op != PERF_MEM_OP_STORE) + return; + + op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3); + + if (sample_type & PERF_SAMPLE_DATA_SRC) { + perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } + + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && + data->data_src.mem_op == PERF_MEM_OP_LOAD) { + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight.var1_dw = op_data3.dc_miss_lat; + data->weight.var2_w = op_data.tag_to_ret_ctr; + } else if (sample_type & PERF_SAMPLE_WEIGHT) { + data->weight.full = op_data3.dc_miss_lat; + } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { + data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + data->sample_flags |= PERF_SAMPLE_ADDR; + } + + if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { + data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; + data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; + } +} + +static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, + int check_rip) +{ + if (sample_type & PERF_SAMPLE_RAW || + (perf_ibs == &perf_ibs_op && + (sample_type & PERF_SAMPLE_DATA_SRC || + sample_type & PERF_SAMPLE_WEIGHT_TYPE || + sample_type & PERF_SAMPLE_ADDR || + sample_type & PERF_SAMPLE_PHYS_ADDR))) + return perf_ibs->offset_max; + else if (check_rip) + return 3; + return 1; +} + +static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) +{ + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + struct perf_event *event = pcpu->event; + struct hw_perf_event *hwc; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + struct perf_ibs_data ibs_data; + int offset, size, check_rip, offset_max, throttle = 0; + unsigned int msr; + u64 *buf, *config, period, new_config = 0; + + if (!test_bit(IBS_STARTED, pcpu->state)) { +fail: + /* + * Catch spurious interrupts after stopping IBS: After + * disabling IBS there could be still incoming NMIs + * with samples that even have the valid bit cleared. + * Mark all this NMIs as handled. + */ + if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) + return 1; + + return 0; + } + + if (WARN_ON_ONCE(!event)) + goto fail; + + hwc = &event->hw; + msr = hwc->config_base; + buf = ibs_data.regs; + rdmsrl(msr, *buf); + if (!(*buf++ & perf_ibs->valid_mask)) + goto fail; + + config = &ibs_data.regs[0]; + perf_ibs_event_update(perf_ibs, event, config); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!perf_ibs_set_period(perf_ibs, hwc, &period)) + goto out; /* no sw counter overflow */ + + ibs_data.caps = ibs_caps; + size = 1; + offset = 1; + check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); + + offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); + + do { + rdmsrl(msr + offset, *buf++); + size++; + offset = find_next_bit(perf_ibs->offset_mask, + perf_ibs->offset_max, + offset + 1); + } while (offset < offset_max); + /* + * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately + * depending on their availability. + * Can't add to offset_max as they are staggered + */ + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_BRNTRGT) { + rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + size++; + } + if (ibs_caps & IBS_CAPS_OPDATA4) { + rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + size++; + } + } + if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { + rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); + size++; + } + } + ibs_data.size = sizeof(u64) * size; + + regs = *iregs; + if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { + regs.flags &= ~PERF_EFLAGS_EXACT; + } else { + /* Workaround for erratum #1197 */ + if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) + goto out; + + set_linear_ip(®s, ibs_data.regs[1]); + regs.flags |= PERF_EFLAGS_EXACT; + } + + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw = (struct perf_raw_record){ + .frag = { + .size = sizeof(u32) + ibs_data.size, + .data = ibs_data.data, + }, + }; + data.raw = &raw; + data.sample_flags |= PERF_SAMPLE_RAW; + } + + if (perf_ibs == &perf_ibs_op) + perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); + + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + data.callchain = perf_callchain(event, iregs); + data.sample_flags |= PERF_SAMPLE_CALLCHAIN; + } + + throttle = perf_event_overflow(event, &data, ®s); +out: + if (throttle) { + perf_ibs_stop(event, 0); + } else { + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_OPCNTEXT) { + new_config = period & IBS_OP_MAX_CNT_EXT_MASK; + period &= ~IBS_OP_MAX_CNT_EXT_MASK; + } + if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) + new_config |= *config & IBS_OP_CUR_CNT_RAND; + } + new_config |= period >> 4; + + perf_ibs_enable_event(perf_ibs, hwc, new_config); + } + + perf_event_update_userpage(event); + + return 1; +} + +static int +perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) +{ + u64 stamp = sched_clock(); + int handled = 0; + + handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); + handled += perf_ibs_handle_irq(&perf_ibs_op, regs); + + if (handled) + inc_irq_stat(apic_perf_irqs); + + perf_sample_event_took(sched_clock() - stamp); + + return handled; +} +NOKPROBE_SYMBOL(perf_ibs_nmi_handler); + +static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) +{ + struct cpu_perf_ibs __percpu *pcpu; + int ret; + + pcpu = alloc_percpu(struct cpu_perf_ibs); + if (!pcpu) + return -ENOMEM; + + perf_ibs->pcpu = pcpu; + + ret = perf_pmu_register(&perf_ibs->pmu, name, -1); + if (ret) { + perf_ibs->pcpu = NULL; + free_percpu(pcpu); + } + + return ret; +} + +static __init int perf_ibs_fetch_init(void) +{ + /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) + perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; + + if (ibs_caps & IBS_CAPS_ZEN4) + perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; + + perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; + perf_ibs_fetch.pmu.attr_update = fetch_attr_update; + + return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); +} + +static __init int perf_ibs_op_init(void) +{ + if (ibs_caps & IBS_CAPS_OPCNT) + perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; + + if (ibs_caps & IBS_CAPS_OPCNTEXT) { + perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; + } + + if (ibs_caps & IBS_CAPS_ZEN4) + perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; + + perf_ibs_op.pmu.attr_groups = empty_attr_groups; + perf_ibs_op.pmu.attr_update = op_attr_update; + + return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); +} + +static __init int perf_event_ibs_init(void) +{ + int ret; + + ret = perf_ibs_fetch_init(); + if (ret) + return ret; + + ret = perf_ibs_op_init(); + if (ret) + goto err_op; + + ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); + if (ret) + goto err_nmi; + + pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); + return 0; + +err_nmi: + perf_pmu_unregister(&perf_ibs_op.pmu); + free_percpu(perf_ibs_op.pcpu); + perf_ibs_op.pcpu = NULL; +err_op: + perf_pmu_unregister(&perf_ibs_fetch.pmu); + free_percpu(perf_ibs_fetch.pcpu); + perf_ibs_fetch.pcpu = NULL; + + return ret; +} + +#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ + +static __init int perf_event_ibs_init(void) +{ + return 0; +} + +#endif + +/* IBS - apic initialization, for perf and oprofile */ + +static __init u32 __get_ibs_caps(void) +{ + u32 caps; + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_DEFAULT; + + caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_DEFAULT; + + return caps; +} + +u32 get_ibs_caps(void) +{ + return ibs_caps; +} + +EXPORT_SYMBOL(get_ibs_caps); + +static inline int get_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); +} + +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + +/* + * Check and reserve APIC extended interrupt LVT offset for IBS if available. + */ +static inline int ibs_eilvt_valid(void) +{ + int offset; + u64 val; + int valid = 0; + + preempt_disable(); + + rdmsrl(MSR_AMD64_IBSCTL, val); + offset = val & IBSCTL_LVT_OFFSET_MASK; + + if (!(val & IBSCTL_LVT_OFFSET_VALID)) { + pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + if (!get_eilvt(offset)) { + pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + valid = 1; +out: + preempt_enable(); + + return valid; +} + +static int setup_ibs_ctl(int ibs_eilvt_off) +{ + struct pci_dev *cpu_cfg; + int nodes; + u32 value = 0; + + nodes = 0; + cpu_cfg = NULL; + do { + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, + cpu_cfg); + if (!cpu_cfg) + break; + ++nodes; + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off + | IBSCTL_LVT_OFFSET_VALID); + pci_read_config_dword(cpu_cfg, IBSCTL, &value); + if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { + pci_dev_put(cpu_cfg); + pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", + value); + return -EINVAL; + } + } while (1); + + if (!nodes) { + pr_debug("No CPU node configured for IBS\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This runs only on the current cpu. We try to find an LVT offset and + * setup the local APIC. For this we must disable preemption. On + * success we initialize all nodes with this offset. This updates then + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of + * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that + * is using the new offset. + */ +static void force_ibs_eilvt_setup(void) +{ + int offset; + int ret; + + preempt_disable(); + /* find the next free available EILVT entry, skip offset 0 */ + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; + } + preempt_enable(); + + if (offset == APIC_EILVT_NR_MAX) { + pr_debug("No EILVT entry available\n"); + return; + } + + ret = setup_ibs_ctl(offset); + if (ret) + goto out; + + if (!ibs_eilvt_valid()) + goto out; + + pr_info("LVT offset %d assigned\n", offset); + + return; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return; +} + +static void ibs_eilvt_setup(void) +{ + /* + * Force LVT offset assignment for family 10h: The offsets are + * not assigned by the BIOS for this family, so the OS is + * responsible for doing it. If the OS assignment fails, fall + * back to BIOS settings and try to setup this. + */ + if (boot_cpu_data.x86 == 0x10) + force_ibs_eilvt_setup(); +} + +static inline int get_ibs_lvt_offset(void) +{ + u64 val; + + rdmsrl(MSR_AMD64_IBSCTL, val); + if (!(val & IBSCTL_LVT_OFFSET_VALID)) + return -EINVAL; + + return val & IBSCTL_LVT_OFFSET_MASK; +} + +static void setup_APIC_ibs(void) +{ + int offset; + + offset = get_ibs_lvt_offset(); + if (offset < 0) + goto failed; + + if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) + return; +failed: + pr_warn("perf: IBS APIC setup failed on cpu #%d\n", + smp_processor_id()); +} + +static void clear_APIC_ibs(void) +{ + int offset; + + offset = get_ibs_lvt_offset(); + if (offset >= 0) + setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); +} + +static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) +{ + setup_APIC_ibs(); + return 0; +} + +#ifdef CONFIG_PM + +static int perf_ibs_suspend(void) +{ + clear_APIC_ibs(); + return 0; +} + +static void perf_ibs_resume(void) +{ + ibs_eilvt_setup(); + setup_APIC_ibs(); +} + +static struct syscore_ops perf_ibs_syscore_ops = { + .resume = perf_ibs_resume, + .suspend = perf_ibs_suspend, +}; + +static void perf_ibs_pm_init(void) +{ + register_syscore_ops(&perf_ibs_syscore_ops); +} + +#else + +static inline void perf_ibs_pm_init(void) { } + +#endif + +static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) +{ + clear_APIC_ibs(); + return 0; +} + +static __init int amd_ibs_init(void) +{ + u32 caps; + + caps = __get_ibs_caps(); + if (!caps) + return -ENODEV; /* ibs not supported by the cpu */ + + ibs_eilvt_setup(); + + if (!ibs_eilvt_valid()) + return -EINVAL; + + perf_ibs_pm_init(); + + ibs_caps = caps; + /* make ibs_caps visible to other cpus: */ + smp_mb(); + /* + * x86_pmu_amd_ibs_starting_cpu will be called from core on + * all online cpus. + */ + cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, + "perf/x86/amd/ibs:starting", + x86_pmu_amd_ibs_starting_cpu, + x86_pmu_amd_ibs_dying_cpu); + + return perf_event_ibs_init(); +} + +/* Since we need the pci subsystem to init ibs we can't do this earlier: */ +device_initcall(amd_ibs_init); diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c new file mode 100644 index 000000000..b15f7b950 --- /dev/null +++ b/arch/x86/events/amd/iommu.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Steven Kinney + * Author: Suravee Suthikulpanit + * + * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation + */ + +#define pr_fmt(fmt) "perf/amd_iommu: " fmt + +#include +#include +#include +#include +#include + +#include "../perf_event.h" +#include "iommu.h" + +/* iommu pmu conf masks */ +#define GET_CSOURCE(x) ((x)->conf & 0xFFULL) +#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) +#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL) +#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL) + +/* iommu pmu conf1 masks */ +#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL) +#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) +#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) + +#define IOMMU_NAME_SIZE 16 + +struct perf_amd_iommu { + struct list_head list; + struct pmu pmu; + struct amd_iommu *iommu; + char name[IOMMU_NAME_SIZE]; + u8 max_banks; + u8 max_counters; + u64 cntr_assign_mask; + raw_spinlock_t lock; +}; + +static LIST_HEAD(perf_amd_iommu_list); + +/*--------------------------------------------- + * sysfs format attributes + *---------------------------------------------*/ +PMU_FORMAT_ATTR(csource, "config:0-7"); +PMU_FORMAT_ATTR(devid, "config:8-23"); +PMU_FORMAT_ATTR(domid, "config:24-39"); +PMU_FORMAT_ATTR(pasid, "config:40-59"); +PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); +PMU_FORMAT_ATTR(domid_mask, "config1:16-31"); +PMU_FORMAT_ATTR(pasid_mask, "config1:32-51"); + +static struct attribute *iommu_format_attrs[] = { + &format_attr_csource.attr, + &format_attr_devid.attr, + &format_attr_pasid.attr, + &format_attr_domid.attr, + &format_attr_devid_mask.attr, + &format_attr_pasid_mask.attr, + &format_attr_domid_mask.attr, + NULL, +}; + +static struct attribute_group amd_iommu_format_group = { + .name = "format", + .attrs = iommu_format_attrs, +}; + +/*--------------------------------------------- + * sysfs events attributes + *---------------------------------------------*/ +static struct attribute_group amd_iommu_events_group = { + .name = "events", +}; + +struct amd_iommu_event_desc { + struct device_attribute attr; + const char *event; +}; + +static ssize_t _iommu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amd_iommu_event_desc *event = + container_of(attr, struct amd_iommu_event_desc, attr); + return sprintf(buf, "%s\n", event->event); +} + +#define AMD_IOMMU_EVENT_DESC(_name, _event) \ +{ \ + .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ + .event = _event, \ +} + +static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { + AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"), + AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"), + AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"), + AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"), + AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"), + AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"), + AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"), + AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"), + AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"), + AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), + AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), + AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), + AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"), + AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"), + AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"), + AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"), + AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"), + { /* end: all zeroes */ }, +}; + +/*--------------------------------------------- + * sysfs cpumask attributes + *---------------------------------------------*/ +static cpumask_t iommu_cpumask; + +static ssize_t _iommu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask); +} +static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); + +static struct attribute *iommu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group amd_iommu_cpumask_group = { + .attrs = iommu_cpumask_attrs, +}; + +/*---------------------------------------------*/ + +static int get_next_avail_iommu_bnk_cntr(struct perf_event *event) +{ + struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu); + int max_cntrs = piommu->max_counters; + int max_banks = piommu->max_banks; + u32 shift, bank, cntr; + unsigned long flags; + int retval; + + raw_spin_lock_irqsave(&piommu->lock, flags); + + for (bank = 0; bank < max_banks; bank++) { + for (cntr = 0; cntr < max_cntrs; cntr++) { + shift = bank + (bank*3) + cntr; + if (piommu->cntr_assign_mask & BIT_ULL(shift)) { + continue; + } else { + piommu->cntr_assign_mask |= BIT_ULL(shift); + event->hw.iommu_bank = bank; + event->hw.iommu_cntr = cntr; + retval = 0; + goto out; + } + } + } + retval = -ENOSPC; +out: + raw_spin_unlock_irqrestore(&piommu->lock, flags); + return retval; +} + +static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, + u8 bank, u8 cntr) +{ + unsigned long flags; + int max_banks, max_cntrs; + int shift = 0; + + max_banks = perf_iommu->max_banks; + max_cntrs = perf_iommu->max_counters; + + if ((bank > max_banks) || (cntr > max_cntrs)) + return -EINVAL; + + shift = bank + cntr + (bank*3); + + raw_spin_lock_irqsave(&perf_iommu->lock, flags); + perf_iommu->cntr_assign_mask &= ~(1ULL<lock, flags); + + return 0; +} + +static int perf_iommu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* test the event attr type check for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * IOMMU counters are shared across all cores. + * Therefore, it does not support per-process mode. + * Also, it does not support event sampling mode. + */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + /* update the hw_perf_event struct with the iommu config data */ + hwc->conf = event->attr.config; + hwc->conf1 = event->attr.config1; + + return 0; +} + +static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev) +{ + return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu; +} + +static void perf_iommu_enable_event(struct perf_event *ev) +{ + struct amd_iommu *iommu = perf_event_2_iommu(ev); + struct hw_perf_event *hwc = &ev->hw; + u8 bank = hwc->iommu_bank; + u8 cntr = hwc->iommu_cntr; + u64 reg = 0ULL; + + reg = GET_CSOURCE(hwc); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®); + + reg = GET_DEVID_MASK(hwc); + reg = GET_DEVID(hwc) | (reg << 32); + if (reg) + reg |= BIT(31); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®); + + reg = GET_PASID_MASK(hwc); + reg = GET_PASID(hwc) | (reg << 32); + if (reg) + reg |= BIT(31); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®); + + reg = GET_DOMID_MASK(hwc); + reg = GET_DOMID(hwc) | (reg << 32); + if (reg) + reg |= BIT(31); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®); +} + +static void perf_iommu_disable_event(struct perf_event *event) +{ + struct amd_iommu *iommu = perf_event_2_iommu(event); + struct hw_perf_event *hwc = &event->hw; + u64 reg = 0ULL; + + amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, + IOMMU_PC_COUNTER_SRC_REG, ®); +} + +static void perf_iommu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + /* + * To account for power-gating, which prevents write to + * the counter, we need to enable the counter + * before setting up counter register. + */ + perf_iommu_enable_event(event); + + if (flags & PERF_EF_RELOAD) { + u64 count = 0; + struct amd_iommu *iommu = perf_event_2_iommu(event); + + /* + * Since the IOMMU PMU only support counting mode, + * the counter always start with value zero. + */ + amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, + IOMMU_PC_COUNTER_REG, &count); + } + + perf_event_update_userpage(event); +} + +static void perf_iommu_read(struct perf_event *event) +{ + u64 count; + struct hw_perf_event *hwc = &event->hw; + struct amd_iommu *iommu = perf_event_2_iommu(event); + + if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, + IOMMU_PC_COUNTER_REG, &count)) + return; + + /* IOMMU pc counter register is only 48 bits */ + count &= GENMASK_ULL(47, 0); + + /* + * Since the counter always start with value zero, + * simply just accumulate the count for the event. + */ + local64_add(count, &event->count); +} + +static void perf_iommu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* + * To account for power-gating, in which reading the counter would + * return zero, we need to read the register before disabling. + */ + perf_iommu_read(event); + hwc->state |= PERF_HES_UPTODATE; + + perf_iommu_disable_event(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; +} + +static int perf_iommu_add(struct perf_event *event, int flags) +{ + int retval; + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* request an iommu bank/counter */ + retval = get_next_avail_iommu_bnk_cntr(event); + if (retval) + return retval; + + if (flags & PERF_EF_START) + perf_iommu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void perf_iommu_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_amd_iommu *perf_iommu = + container_of(event->pmu, struct perf_amd_iommu, pmu); + + perf_iommu_stop(event, PERF_EF_UPDATE); + + /* clear the assigned iommu bank/counter */ + clear_avail_iommu_bnk_cntr(perf_iommu, + hwc->iommu_bank, hwc->iommu_cntr); + + perf_event_update_userpage(event); +} + +static __init int _init_events_attrs(void) +{ + int i = 0, j; + struct attribute **attrs; + + while (amd_iommu_v2_event_descs[i].attr.attr.name) + i++; + + attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + for (j = 0; j < i; j++) + attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; + + amd_iommu_events_group.attrs = attrs; + return 0; +} + +static const struct attribute_group *amd_iommu_attr_groups[] = { + &amd_iommu_format_group, + &amd_iommu_cpumask_group, + &amd_iommu_events_group, + NULL, +}; + +static const struct pmu iommu_pmu __initconst = { + .event_init = perf_iommu_event_init, + .add = perf_iommu_add, + .del = perf_iommu_del, + .start = perf_iommu_start, + .stop = perf_iommu_stop, + .read = perf_iommu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_iommu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, +}; + +static __init int init_one_iommu(unsigned int idx) +{ + struct perf_amd_iommu *perf_iommu; + int ret; + + perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL); + if (!perf_iommu) + return -ENOMEM; + + raw_spin_lock_init(&perf_iommu->lock); + + perf_iommu->pmu = iommu_pmu; + perf_iommu->iommu = get_amd_iommu(idx); + perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx); + perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx); + + if (!perf_iommu->iommu || + !perf_iommu->max_banks || + !perf_iommu->max_counters) { + kfree(perf_iommu); + return -EINVAL; + } + + snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx); + + ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1); + if (!ret) { + pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n", + idx, perf_iommu->max_banks, perf_iommu->max_counters); + list_add_tail(&perf_iommu->list, &perf_amd_iommu_list); + } else { + pr_warn("Error initializing IOMMU %d.\n", idx); + kfree(perf_iommu); + } + return ret; +} + +static __init int amd_iommu_pc_init(void) +{ + unsigned int i, cnt = 0; + int ret; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_supported()) + return -ENODEV; + + ret = _init_events_attrs(); + if (ret) + return ret; + + /* + * An IOMMU PMU is specific to an IOMMU, and can function independently. + * So we go through all IOMMUs and ignore the one that fails init + * unless all IOMMU are failing. + */ + for (i = 0; i < amd_iommu_get_num_iommus(); i++) { + ret = init_one_iommu(i); + if (!ret) + cnt++; + } + + if (!cnt) { + kfree(amd_iommu_events_group.attrs); + return -ENODEV; + } + + /* Init cpumask attributes to only core 0 */ + cpumask_set_cpu(0, &iommu_cpumask); + return 0; +} + +device_initcall(amd_iommu_pc_init); diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h new file mode 100644 index 000000000..e6310c635 --- /dev/null +++ b/arch/x86/events/amd/iommu.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Steven Kinney + * Author: Suravee Suthikulpanit + */ + +#ifndef _PERF_EVENT_AMD_IOMMU_H_ +#define _PERF_EVENT_AMD_IOMMU_H_ + +/* iommu pc mmio region register indexes */ +#define IOMMU_PC_COUNTER_REG 0x00 +#define IOMMU_PC_COUNTER_SRC_REG 0x08 +#define IOMMU_PC_PASID_MATCH_REG 0x10 +#define IOMMU_PC_DOMID_MATCH_REG 0x18 +#define IOMMU_PC_DEVID_MATCH_REG 0x20 +#define IOMMU_PC_COUNTER_REPORT_REG 0x28 + +/* maximum specified bank/counters */ +#define PC_MAX_SPEC_BNKS 64 +#define PC_MAX_SPEC_CNTRS 16 + +#endif /*_PERF_EVENT_AMD_IOMMU_H_*/ diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c new file mode 100644 index 000000000..38a75216c --- /dev/null +++ b/arch/x86/events/amd/lbr.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include "../perf_event.h" + +/* LBR Branch Select valid bits */ +#define LBR_SELECT_MASK 0x1ff + +/* + * LBR Branch Select filter bits which when set, ensures that the + * corresponding type of branches are not recorded + */ +#define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */ +#define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */ +#define LBR_SELECT_JCC 2 /* Conditional branches */ +#define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */ +#define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */ +#define LBR_SELECT_RET_NEAR 5 /* Near returns */ +#define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */ +#define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */ +#define LBR_SELECT_FAR_BRANCH 8 /* Far branches */ + +#define LBR_KERNEL BIT(LBR_SELECT_KERNEL) +#define LBR_USER BIT(LBR_SELECT_USER) +#define LBR_JCC BIT(LBR_SELECT_JCC) +#define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL) +#define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND) +#define LBR_RETURN BIT(LBR_SELECT_RET_NEAR) +#define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL) +#define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND) +#define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH) +#define LBR_NOT_SUPP -1 /* unsupported filter */ +#define LBR_IGNORE 0 + +#define LBR_ANY \ + (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \ + LBR_REL_JMP | LBR_IND_JMP | LBR_FAR) + +struct branch_entry { + union { + struct { + u64 ip:58; + u64 ip_sign_ext:5; + u64 mispredict:1; + } split; + u64 full; + } from; + + union { + struct { + u64 ip:58; + u64 ip_sign_ext:3; + u64 reserved:1; + u64 spec:1; + u64 valid:1; + } split; + u64 full; + } to; +}; + +static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) +{ + wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); +} + +static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) +{ + wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); +} + +static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) +{ + u64 val; + + rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + + return val; +} + +static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) +{ + u64 val; + + rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + + return val; +} + +static __always_inline u64 sign_ext_branch_ip(u64 ip) +{ + u32 shift = 64 - boot_cpu_data.x86_virt_bits; + + return (u64)(((s64)ip << shift) >> shift); +} + +static void amd_pmu_lbr_filter(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int br_sel = cpuc->br_sel, offset, type, i, j; + bool compress = false; + bool fused_only = false; + u64 from, to; + + /* If sampling all branches, there is nothing to filter */ + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) + fused_only = true; + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + from = cpuc->lbr_entries[i].from; + to = cpuc->lbr_entries[i].to; + type = branch_type_fused(from, to, 0, &offset); + + /* + * Adjust the branch from address in case of instruction + * fusion where it points to an instruction preceding the + * actual branch + */ + if (offset) { + cpuc->lbr_entries[i].from += offset; + if (fused_only) + continue; + } + + /* If type does not correspond, then discard */ + if (type == X86_BR_NONE || (br_sel & type) != type) { + cpuc->lbr_entries[i].from = 0; /* mark invalid */ + compress = true; + } + + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) + cpuc->lbr_entries[i].type = common_branch_type(type); + } + + if (!compress) + return; + + /* Remove all invalid entries */ + for (i = 0; i < cpuc->lbr_stack.nr; ) { + if (!cpuc->lbr_entries[i].from) { + j = i; + while (++j < cpuc->lbr_stack.nr) + cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j]; + cpuc->lbr_stack.nr--; + if (!cpuc->lbr_entries[i].from) + continue; + } + i++; + } +} + +static const int lbr_spec_map[PERF_BR_SPEC_MAX] = { + PERF_BR_SPEC_NA, + PERF_BR_SPEC_WRONG_PATH, + PERF_BR_NON_SPEC_CORRECT_PATH, + PERF_BR_SPEC_CORRECT_PATH, +}; + +void amd_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_branch_entry *br = cpuc->lbr_entries; + struct branch_entry entry; + int out = 0, idx, i; + + if (!cpuc->lbr_users) + return; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + entry.from.full = amd_pmu_lbr_get_from(i); + entry.to.full = amd_pmu_lbr_get_to(i); + + /* + * Check if a branch has been logged; if valid = 0, spec = 0 + * then no branch was recorded + */ + if (!entry.to.split.valid && !entry.to.split.spec) + continue; + + perf_clear_branch_entry_bitfields(br + out); + + br[out].from = sign_ext_branch_ip(entry.from.split.ip); + br[out].to = sign_ext_branch_ip(entry.to.split.ip); + br[out].mispred = entry.from.split.mispredict; + br[out].predicted = !br[out].mispred; + + /* + * Set branch speculation information using the status of + * the valid and spec bits. + * + * When valid = 0, spec = 0, no branch was recorded and the + * entry is discarded as seen above. + * + * When valid = 0, spec = 1, the recorded branch was + * speculative but took the wrong path. + * + * When valid = 1, spec = 0, the recorded branch was + * non-speculative but took the correct path. + * + * When valid = 1, spec = 1, the recorded branch was + * speculative and took the correct path + */ + idx = (entry.to.split.valid << 1) | entry.to.split.spec; + br[out].spec = lbr_spec_map[idx]; + out++; + } + + cpuc->lbr_stack.nr = out; + + /* + * Internal register renaming always ensures that LBR From[0] and + * LBR To[0] always represent the TOS + */ + cpuc->lbr_stack.hw_idx = 0; + + /* Perform further software filtering */ + amd_pmu_lbr_filter(); +} + +static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE, + + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, + + [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP, + [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP, +}; + +static int amd_pmu_lbr_setup_filter(struct perf_event *event) +{ + struct hw_perf_event_extra *reg = &event->hw.branch_reg; + u64 br_type = event->attr.branch_sample_type; + u64 mask = 0, v; + int i; + + /* No LBR support */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + if (br_type & PERF_SAMPLE_BRANCH_USER) + mask |= X86_BR_USER; + + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + mask |= X86_BR_KERNEL; + + /* Ignore BRANCH_HV here */ + + if (br_type & PERF_SAMPLE_BRANCH_ANY) + mask |= X86_BR_ANY; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) + mask |= X86_BR_ANY_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; + + if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) + mask |= X86_BR_IND_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + + if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) + mask |= X86_BR_IND_JMP; + + if (br_type & PERF_SAMPLE_BRANCH_CALL) + mask |= X86_BR_CALL | X86_BR_ZERO_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) + mask |= X86_BR_TYPE_SAVE; + + reg->reg = mask; + mask = 0; + + for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + if (!(br_type & BIT_ULL(i))) + continue; + + v = lbr_select_map[i]; + if (v == LBR_NOT_SUPP) + return -EOPNOTSUPP; + + if (v != LBR_IGNORE) + mask |= v; + } + + /* Filter bits operate in suppress mode */ + reg->config = mask ^ LBR_SELECT_MASK; + + return 0; +} + +int amd_pmu_lbr_hw_config(struct perf_event *event) +{ + int ret = 0; + + /* LBR is not recommended in counting mode */ + if (!is_sampling_event(event)) + return -EINVAL; + + ret = amd_pmu_lbr_setup_filter(event); + if (!ret) + event->attach_state |= PERF_ATTACH_SCHED_CB; + + return ret; +} + +void amd_pmu_lbr_reset(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + if (!x86_pmu.lbr_nr) + return; + + /* Reset all branch records individually */ + for (i = 0; i < x86_pmu.lbr_nr; i++) { + amd_pmu_lbr_set_from(i, 0); + amd_pmu_lbr_set_to(i, 0); + } + + cpuc->last_task_ctx = NULL; + cpuc->last_log_id = 0; + wrmsrl(MSR_AMD64_LBR_SELECT, 0); +} + +void amd_pmu_lbr_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event_extra *reg = &event->hw.branch_reg; + + if (!x86_pmu.lbr_nr) + return; + + if (has_branch_stack(event)) { + cpuc->lbr_select = 1; + cpuc->lbr_sel->config = reg->config; + cpuc->br_sel = reg->reg; + } + + perf_sched_cb_inc(event->ctx->pmu); + + if (!cpuc->lbr_users++ && !event->total_time_running) + amd_pmu_lbr_reset(); +} + +void amd_pmu_lbr_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + if (has_branch_stack(event)) + cpuc->lbr_select = 0; + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + perf_sched_cb_dec(event->ctx->pmu); +} + +void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * A context switch can flip the address space and LBR entries are + * not tagged with an identifier. Hence, branches cannot be resolved + * from the old address space and the LBR records should be wiped. + */ + if (cpuc->lbr_users && sched_in) + amd_pmu_lbr_reset(); +} + +void amd_pmu_lbr_enable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 lbr_select, dbg_ctl, dbg_extn_cfg; + + if (!cpuc->lbr_users || !x86_pmu.lbr_nr) + return; + + /* Set hardware branch filter */ + if (cpuc->lbr_select) { + lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; + wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); + } + + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); +} + +void amd_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 dbg_ctl, dbg_extn_cfg; + + if (!cpuc->lbr_users || !x86_pmu.lbr_nr) + return; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); +} + +__init int amd_pmu_lbr_init(void) +{ + union cpuid_0x80000022_ebx ebx; + + if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2)) + return -EOPNOTSUPP; + + /* Set number of entries */ + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz; + + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + + return 0; +} diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c new file mode 100644 index 000000000..37d5b3805 --- /dev/null +++ b/arch/x86/events/amd/power.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Performance events - AMD Processor Power Reporting Mechanism + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Huang Rui + */ + +#include +#include +#include +#include +#include "../perf_event.h" + +/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */ +#define AMD_POWER_EVENT_MASK 0xFFULL + +/* + * Accumulated power status counters. + */ +#define AMD_POWER_EVENTSEL_PKG 1 + +/* + * The ratio of compute unit power accumulator sample period to the + * PTSC period. + */ +static unsigned int cpu_pwr_sample_ratio; + +/* Maximum accumulated power of a compute unit. */ +static u64 max_cu_acc_power; + +static struct pmu pmu_class; + +/* + * Accumulated power represents the sum of each compute unit's (CU) power + * consumption. On any core of each CU we read the total accumulated power from + * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores + * which are picked to measure the power for the CUs they belong to. + */ +static cpumask_t cpu_mask; + +static void event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc; + u64 delta, tdelta; + + prev_pwr_acc = hwc->pwr_acc; + prev_ptsc = hwc->ptsc; + rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); + rdmsrl(MSR_F15H_PTSC, new_ptsc); + + /* + * Calculate the CU power consumption over a time period, the unit of + * final value (delta) is micro-Watts. Then add it to the event count. + */ + if (new_pwr_acc < prev_pwr_acc) { + delta = max_cu_acc_power + new_pwr_acc; + delta -= prev_pwr_acc; + } else + delta = new_pwr_acc - prev_pwr_acc; + + delta *= cpu_pwr_sample_ratio * 1000; + tdelta = new_ptsc - prev_ptsc; + + do_div(delta, tdelta); + local64_add(delta, &event->count); +} + +static void __pmu_event_start(struct perf_event *event) +{ + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + + rdmsrl(MSR_F15H_PTSC, event->hw.ptsc); + rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); +} + +static void pmu_event_start(struct perf_event *event, int mode) +{ + __pmu_event_start(event); +} + +static void pmu_event_stop(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Mark event as deactivated and stopped. */ + if (!(hwc->state & PERF_HES_STOPPED)) + hwc->state |= PERF_HES_STOPPED; + + /* Check if software counter update is necessary. */ + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of an event + * that we are disabling: + */ + event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int pmu_event_add(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (mode & PERF_EF_START) + __pmu_event_start(event); + + return 0; +} + +static void pmu_event_del(struct perf_event *event, int flags) +{ + pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK; + + /* Only look at AMD power events. */ + if (event->attr.type != pmu_class.type) + return -ENOENT; + + /* Unsupported modes and filters. */ + if (event->attr.sample_period) + return -EINVAL; + + if (cfg != AMD_POWER_EVENTSEL_PKG) + return -EINVAL; + + return 0; +} + +static void pmu_event_read(struct perf_event *event) +{ + event_update(event); +} + +static ssize_t +get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &cpu_mask); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL); + +static struct attribute *pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group pmu_attr_group = { + .attrs = pmu_attrs, +}; + +/* + * Currently it only supports to report the power of each + * processor/package. + */ +EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01"); + +EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts"); + +/* Convert the count from micro-Watts to milli-Watts. */ +EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3"); + +static struct attribute *events_attr[] = { + EVENT_PTR(power_pkg), + EVENT_PTR(power_pkg_unit), + EVENT_PTR(power_pkg_scale), + NULL, +}; + +static struct attribute_group pmu_events_group = { + .name = "events", + .attrs = events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group pmu_format_group = { + .name = "format", + .attrs = formats_attr, +}; + +static const struct attribute_group *attr_groups[] = { + &pmu_attr_group, + &pmu_format_group, + &pmu_events_group, + NULL, +}; + +static struct pmu pmu_class = { + .attr_groups = attr_groups, + /* system-wide only */ + .task_ctx_nr = perf_invalid_context, + .event_init = pmu_event_init, + .add = pmu_event_add, + .del = pmu_event_del, + .start = pmu_event_start, + .stop = pmu_event_stop, + .read = pmu_event_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, +}; + +static int power_cpu_exit(unsigned int cpu) +{ + int target; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) + return 0; + + /* + * Find a new CPU on the same compute unit, if was set in cpumask + * and still some CPUs on compute unit. Then migrate event and + * context to new CPU. + */ + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + if (target < nr_cpumask_bits) { + cpumask_set_cpu(target, &cpu_mask); + perf_pmu_migrate_context(&pmu_class, cpu, target); + } + return 0; +} + +static int power_cpu_init(unsigned int cpu) +{ + int target; + + /* + * 1) If any CPU is set at cpu_mask in the same compute unit, do + * nothing. + * 2) If no CPU is set at cpu_mask in the same compute unit, + * set current ONLINE CPU. + * + * Note: if there is a CPU aside of the new one already in the + * sibling mask, then it is also in cpu_mask. + */ + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + if (target >= nr_cpumask_bits) + cpumask_set_cpu(cpu, &cpu_mask); + return 0; +} + +static const struct x86_cpu_id cpu_match[] = { + X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), + {}, +}; + +static int __init amd_power_pmu_init(void) +{ + int ret; + + if (!x86_match_cpu(cpu_match)) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) + return -ENODEV; + + cpu_pwr_sample_ratio = cpuid_ecx(0x80000007); + + if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { + pr_err("Failed to read max compute unit power accumulator MSR\n"); + return -ENODEV; + } + + + cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, + "perf/x86/amd/power:online", + power_cpu_init, power_cpu_exit); + + ret = perf_pmu_register(&pmu_class, "power", -1); + if (WARN_ON(ret)) { + pr_warn("AMD Power PMU registration failed\n"); + return ret; + } + + pr_info("AMD Power PMU detected\n"); + return ret; +} +module_init(amd_power_pmu_init); + +static void __exit amd_power_pmu_exit(void) +{ + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE); + perf_pmu_unregister(&pmu_class); +} +module_exit(amd_power_pmu_exit); + +MODULE_AUTHOR("Huang Rui "); +MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c new file mode 100644 index 000000000..83f15fe41 --- /dev/null +++ b/arch/x86/events/amd/uncore.c @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Jacob Shin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_COUNTERS_NB 4 +#define NUM_COUNTERS_L2 4 +#define NUM_COUNTERS_L3 6 + +#define RDPMC_BASE_NB 6 +#define RDPMC_BASE_LLC 10 + +#define COUNTER_SHIFT 16 + +#undef pr_fmt +#define pr_fmt(fmt) "amd_uncore: " fmt + +static int pmu_version; +static int num_counters_llc; +static int num_counters_nb; +static bool l3_mask; + +static HLIST_HEAD(uncore_unused_list); + +struct amd_uncore { + int id; + int refcnt; + int cpu; + int num_counters; + int rdpmc_base; + u32 msr_base; + cpumask_t *active_mask; + struct pmu *pmu; + struct perf_event **events; + struct hlist_node node; +}; + +static struct amd_uncore * __percpu *amd_uncore_nb; +static struct amd_uncore * __percpu *amd_uncore_llc; + +static struct pmu amd_nb_pmu; +static struct pmu amd_llc_pmu; + +static cpumask_t amd_nb_active_mask; +static cpumask_t amd_llc_active_mask; + +static bool is_nb_event(struct perf_event *event) +{ + return event->pmu->type == amd_nb_pmu.type; +} + +static bool is_llc_event(struct perf_event *event) +{ + return event->pmu->type == amd_llc_pmu.type; +} + +static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) +{ + if (is_nb_event(event) && amd_uncore_nb) + return *per_cpu_ptr(amd_uncore_nb, event->cpu); + else if (is_llc_event(event) && amd_uncore_llc) + return *per_cpu_ptr(amd_uncore_llc, event->cpu); + + return NULL; +} + +static void amd_uncore_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + s64 delta; + + /* + * since we do not enable counter overflow interrupts, + * we do not have to worry about prev_count changing on us + */ + + prev = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new); + local64_set(&hwc->prev_count, new); + delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); + delta >>= COUNTER_SHIFT; + local64_add(delta, &event->count); +} + +static void amd_uncore_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + + hwc->state = 0; + wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); + perf_event_update_userpage(event); +} + +static void amd_uncore_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); + hwc->state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + amd_uncore_read(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int amd_uncore_add(struct perf_event *event, int flags) +{ + int i; + struct amd_uncore *uncore = event_to_amd_uncore(event); + struct hw_perf_event *hwc = &event->hw; + + /* are we already assigned? */ + if (hwc->idx != -1 && uncore->events[hwc->idx] == event) + goto out; + + for (i = 0; i < uncore->num_counters; i++) { + if (uncore->events[i] == event) { + hwc->idx = i; + goto out; + } + } + + /* if not, take the first available counter */ + hwc->idx = -1; + for (i = 0; i < uncore->num_counters; i++) { + if (cmpxchg(&uncore->events[i], NULL, event) == NULL) { + hwc->idx = i; + break; + } + } + +out: + if (hwc->idx == -1) + return -EBUSY; + + hwc->config_base = uncore->msr_base + (2 * hwc->idx); + hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx); + hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* + * The first four DF counters are accessible via RDPMC index 6 to 9 + * followed by the L3 counters from index 10 to 15. For processors + * with more than four DF counters, the DF RDPMC assignments become + * discontiguous as the additional counters are accessible starting + * from index 16. + */ + if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB) + hwc->event_base_rdpmc += NUM_COUNTERS_L3; + + if (flags & PERF_EF_START) + amd_uncore_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void amd_uncore_del(struct perf_event *event, int flags) +{ + int i; + struct amd_uncore *uncore = event_to_amd_uncore(event); + struct hw_perf_event *hwc = &event->hw; + + amd_uncore_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < uncore->num_counters; i++) { + if (cmpxchg(&uncore->events[i], event, NULL) == event) + break; + } + + hwc->idx = -1; +} + +/* + * Return a full thread and slice mask unless user + * has provided them + */ +static u64 l3_thread_slice_mask(u64 config) +{ + if (boot_cpu_data.x86 <= 0x18) + return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | + ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); + + /* + * If the user doesn't specify a threadmask, they're not trying to + * count core 0, so we enable all cores & threads. + * We'll also assume that they want to count slice 0 if they specify + * a threadmask and leave sliceid and enallslices unpopulated. + */ + if (!(config & AMD64_L3_F19H_THREAD_MASK)) + return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | + AMD64_L3_EN_ALL_CORES; + + return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | + AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | + AMD64_L3_COREID_MASK); +} + +static int amd_uncore_event_init(struct perf_event *event) +{ + struct amd_uncore *uncore; + struct hw_perf_event *hwc = &event->hw; + u64 event_mask = AMD64_RAW_EVENT_MASK_NB; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (pmu_version >= 2 && is_nb_event(event)) + event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB; + + /* + * NB and Last level cache counters (MSRs) are shared across all cores + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: + */ + hwc->config = event->attr.config & event_mask; + hwc->idx = -1; + + if (event->cpu < 0) + return -EINVAL; + + /* + * SliceMask and ThreadMask need to be set for certain L3 events. + * For other events, the two fields do not affect the count. + */ + if (l3_mask && is_llc_event(event)) + hwc->config |= l3_thread_slice_mask(event->attr.config); + + uncore = event_to_amd_uncore(event); + if (!uncore) + return -ENODEV; + + /* + * since request can come in to any of the shared cores, we will remap + * to a single common cpu. + */ + event->cpu = uncore->cpu; + + return 0; +} + +static umode_t +amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ? + attr->mode : 0; +} + +static umode_t +amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0; +} + +static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + cpumask_t *active_mask; + struct pmu *pmu = dev_get_drvdata(dev); + + if (pmu->type == amd_nb_pmu.type) + active_mask = &amd_nb_active_mask; + else if (pmu->type == amd_llc_pmu.type) + active_mask = &amd_llc_active_mask; + else + return 0; + + return cpumap_print_to_pagebuf(true, buf, active_mask); +} +static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); + +static struct attribute *amd_uncore_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group amd_uncore_attr_group = { + .attrs = amd_uncore_attrs, +}; + +#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct device_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __uncore_##_var##_show, NULL) + +DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35"); +DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */ +DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */ +DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */ +DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */ +DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */ + +/* Common DF and NB attributes */ +static struct attribute *amd_uncore_df_format_attr[] = { + &format_attr_event12.attr, /* event */ + &format_attr_umask8.attr, /* umask */ + NULL, +}; + +/* Common L2 and L3 attributes */ +static struct attribute *amd_uncore_l3_format_attr[] = { + &format_attr_event12.attr, /* event */ + &format_attr_umask8.attr, /* umask */ + NULL, /* threadmask */ + NULL, +}; + +/* F17h unique L3 attributes */ +static struct attribute *amd_f17h_uncore_l3_format_attr[] = { + &format_attr_slicemask.attr, /* slicemask */ + NULL, +}; + +/* F19h unique L3 attributes */ +static struct attribute *amd_f19h_uncore_l3_format_attr[] = { + &format_attr_coreid.attr, /* coreid */ + &format_attr_enallslices.attr, /* enallslices */ + &format_attr_enallcores.attr, /* enallcores */ + &format_attr_sliceid.attr, /* sliceid */ + NULL, +}; + +static struct attribute_group amd_uncore_df_format_group = { + .name = "format", + .attrs = amd_uncore_df_format_attr, +}; + +static struct attribute_group amd_uncore_l3_format_group = { + .name = "format", + .attrs = amd_uncore_l3_format_attr, +}; + +static struct attribute_group amd_f17h_uncore_l3_format_group = { + .name = "format", + .attrs = amd_f17h_uncore_l3_format_attr, + .is_visible = amd_f17h_uncore_is_visible, +}; + +static struct attribute_group amd_f19h_uncore_l3_format_group = { + .name = "format", + .attrs = amd_f19h_uncore_l3_format_attr, + .is_visible = amd_f19h_uncore_is_visible, +}; + +static const struct attribute_group *amd_uncore_df_attr_groups[] = { + &amd_uncore_attr_group, + &amd_uncore_df_format_group, + NULL, +}; + +static const struct attribute_group *amd_uncore_l3_attr_groups[] = { + &amd_uncore_attr_group, + &amd_uncore_l3_format_group, + NULL, +}; + +static const struct attribute_group *amd_uncore_l3_attr_update[] = { + &amd_f17h_uncore_l3_format_group, + &amd_f19h_uncore_l3_format_group, + NULL, +}; + +static struct pmu amd_nb_pmu = { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_df_attr_groups, + .name = "amd_nb", + .event_init = amd_uncore_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, +}; + +static struct pmu amd_llc_pmu = { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_l3_attr_groups, + .attr_update = amd_uncore_l3_attr_update, + .name = "amd_l2", + .event_init = amd_uncore_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, +}; + +static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) +{ + return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL, + cpu_to_node(cpu)); +} + +static inline struct perf_event ** +amd_uncore_events_alloc(unsigned int num, unsigned int cpu) +{ + return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL, + cpu_to_node(cpu)); +} + +static int amd_uncore_cpu_up_prepare(unsigned int cpu) +{ + struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL; + + if (amd_uncore_nb) { + *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; + uncore_nb = amd_uncore_alloc(cpu); + if (!uncore_nb) + goto fail; + uncore_nb->cpu = cpu; + uncore_nb->num_counters = num_counters_nb; + uncore_nb->rdpmc_base = RDPMC_BASE_NB; + uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; + uncore_nb->active_mask = &amd_nb_active_mask; + uncore_nb->pmu = &amd_nb_pmu; + uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu); + if (!uncore_nb->events) + goto fail; + uncore_nb->id = -1; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; + } + + if (amd_uncore_llc) { + *per_cpu_ptr(amd_uncore_llc, cpu) = NULL; + uncore_llc = amd_uncore_alloc(cpu); + if (!uncore_llc) + goto fail; + uncore_llc->cpu = cpu; + uncore_llc->num_counters = num_counters_llc; + uncore_llc->rdpmc_base = RDPMC_BASE_LLC; + uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_llc->active_mask = &amd_llc_active_mask; + uncore_llc->pmu = &amd_llc_pmu; + uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu); + if (!uncore_llc->events) + goto fail; + uncore_llc->id = -1; + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc; + } + + return 0; + +fail: + if (uncore_nb) { + kfree(uncore_nb->events); + kfree(uncore_nb); + } + + if (uncore_llc) { + kfree(uncore_llc->events); + kfree(uncore_llc); + } + + return -ENOMEM; +} + +static struct amd_uncore * +amd_uncore_find_online_sibling(struct amd_uncore *this, + struct amd_uncore * __percpu *uncores) +{ + unsigned int cpu; + struct amd_uncore *that; + + for_each_online_cpu(cpu) { + that = *per_cpu_ptr(uncores, cpu); + + if (!that) + continue; + + if (this == that) + continue; + + if (this->id == that->id) { + hlist_add_head(&this->node, &uncore_unused_list); + this = that; + break; + } + } + + this->refcnt++; + return this; +} + +static int amd_uncore_cpu_starting(unsigned int cpu) +{ + unsigned int eax, ebx, ecx, edx; + struct amd_uncore *uncore; + + if (amd_uncore_nb) { + uncore = *per_cpu_ptr(amd_uncore_nb, cpu); + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + uncore->id = ecx & 0xff; + + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb); + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } + + if (amd_uncore_llc) { + uncore = *per_cpu_ptr(amd_uncore_llc, cpu); + uncore->id = get_llc_id(cpu); + + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; + } + + return 0; +} + +static void uncore_clean_online(void) +{ + struct amd_uncore *uncore; + struct hlist_node *n; + + hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { + hlist_del(&uncore->node); + kfree(uncore->events); + kfree(uncore); + } +} + +static void uncore_online(unsigned int cpu, + struct amd_uncore * __percpu *uncores) +{ + struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + + uncore_clean_online(); + + if (cpu == uncore->cpu) + cpumask_set_cpu(cpu, uncore->active_mask); +} + +static int amd_uncore_cpu_online(unsigned int cpu) +{ + if (amd_uncore_nb) + uncore_online(cpu, amd_uncore_nb); + + if (amd_uncore_llc) + uncore_online(cpu, amd_uncore_llc); + + return 0; +} + +static void uncore_down_prepare(unsigned int cpu, + struct amd_uncore * __percpu *uncores) +{ + unsigned int i; + struct amd_uncore *this = *per_cpu_ptr(uncores, cpu); + + if (this->cpu != cpu) + return; + + /* this cpu is going down, migrate to a shared sibling if possible */ + for_each_online_cpu(i) { + struct amd_uncore *that = *per_cpu_ptr(uncores, i); + + if (cpu == i) + continue; + + if (this == that) { + perf_pmu_migrate_context(this->pmu, cpu, i); + cpumask_clear_cpu(cpu, that->active_mask); + cpumask_set_cpu(i, that->active_mask); + that->cpu = i; + break; + } + } +} + +static int amd_uncore_cpu_down_prepare(unsigned int cpu) +{ + if (amd_uncore_nb) + uncore_down_prepare(cpu, amd_uncore_nb); + + if (amd_uncore_llc) + uncore_down_prepare(cpu, amd_uncore_llc); + + return 0; +} + +static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) +{ + struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + + if (cpu == uncore->cpu) + cpumask_clear_cpu(cpu, uncore->active_mask); + + if (!--uncore->refcnt) { + kfree(uncore->events); + kfree(uncore); + } + + *per_cpu_ptr(uncores, cpu) = NULL; +} + +static int amd_uncore_cpu_dead(unsigned int cpu) +{ + if (amd_uncore_nb) + uncore_dead(cpu, amd_uncore_nb); + + if (amd_uncore_llc) + uncore_dead(cpu, amd_uncore_llc); + + return 0; +} + +static int __init amd_uncore_init(void) +{ + struct attribute **df_attr = amd_uncore_df_format_attr; + struct attribute **l3_attr = amd_uncore_l3_format_attr; + union cpuid_0x80000022_ebx ebx; + int ret = -ENODEV; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) + return -ENODEV; + + if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) + pmu_version = 2; + + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + if (boot_cpu_data.x86 >= 0x17) { + /* + * For F17h and above, the Northbridge counters are + * repurposed as Data Fabric counters. Also, L3 + * counters are supported too. The PMUs are exported + * based on family as either L2 or L3 and NB or DF. + */ + num_counters_llc = NUM_COUNTERS_L3; + amd_nb_pmu.name = "amd_df"; + amd_llc_pmu.name = "amd_l3"; + l3_mask = true; + } + + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { + if (pmu_version >= 2) { + *df_attr++ = &format_attr_event14v2.attr; + *df_attr++ = &format_attr_umask12.attr; + } else if (boot_cpu_data.x86 >= 0x17) { + *df_attr = &format_attr_event14.attr; + } + + amd_uncore_nb = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_nb) { + ret = -ENOMEM; + goto fail_nb; + } + ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (ret) + goto fail_nb; + + if (pmu_version >= 2) { + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + num_counters_nb = ebx.split.num_df_pmc; + } + + pr_info("%d %s %s counters detected\n", num_counters_nb, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + amd_nb_pmu.name); + + ret = 0; + } + + if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { + if (boot_cpu_data.x86 >= 0x19) { + *l3_attr++ = &format_attr_event8.attr; + *l3_attr++ = &format_attr_umask8.attr; + *l3_attr++ = &format_attr_threadmask2.attr; + } else if (boot_cpu_data.x86 >= 0x17) { + *l3_attr++ = &format_attr_event8.attr; + *l3_attr++ = &format_attr_umask8.attr; + *l3_attr++ = &format_attr_threadmask8.attr; + } + + amd_uncore_llc = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_llc) { + ret = -ENOMEM; + goto fail_llc; + } + ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1); + if (ret) + goto fail_llc; + + pr_info("%d %s %s counters detected\n", num_counters_llc, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + amd_llc_pmu.name); + ret = 0; + } + + /* + * Install callbacks. Core will call them for each online cpu. + */ + if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, + "perf/x86/amd/uncore:prepare", + amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) + goto fail_llc; + + if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, + "perf/x86/amd/uncore:starting", + amd_uncore_cpu_starting, NULL)) + goto fail_prep; + if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, + "perf/x86/amd/uncore:online", + amd_uncore_cpu_online, + amd_uncore_cpu_down_prepare)) + goto fail_start; + return 0; + +fail_start: + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); +fail_prep: + cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); +fail_llc: + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) + perf_pmu_unregister(&amd_nb_pmu); + free_percpu(amd_uncore_llc); +fail_nb: + free_percpu(amd_uncore_nb); + + return ret; +} + +static void __exit amd_uncore_exit(void) +{ + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); + cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); + + if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { + perf_pmu_unregister(&amd_llc_pmu); + free_percpu(amd_uncore_llc); + amd_uncore_llc = NULL; + } + + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { + perf_pmu_unregister(&amd_nb_pmu); + free_percpu(amd_uncore_nb); + amd_uncore_nb = NULL; + } +} + +module_init(amd_uncore_init); +module_exit(amd_uncore_exit); + +MODULE_DESCRIPTION("AMD Uncore Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c new file mode 100644 index 000000000..30fb4931d --- /dev/null +++ b/arch/x86/events/core.c @@ -0,0 +1,3029 @@ +/* + * Performance events x86 architecture code + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, + * Copyright (C) 2009 Google, Inc., Stephane Eranian + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "perf_event.h" + +struct x86_pmu x86_pmu __read_mostly; +static struct pmu pmu; + +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .enabled = 1, + .pmu = &pmu, +}; + +DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); +DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); +DEFINE_STATIC_KEY_FALSE(perf_is_hybrid); + +/* + * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined + * from just a typename, as opposed to an actual function. + */ +DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable); + +DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign); + +DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); +DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); +DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); + +DEFINE_STATIC_CALL_NULL(x86_pmu_set_period, *x86_pmu.set_period); +DEFINE_STATIC_CALL_NULL(x86_pmu_update, *x86_pmu.update); +DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period); + +DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events); +DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints); +DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints); + +DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); + +DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); +DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); + +DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); + +/* + * This one is magic, it will get called even when PMU init fails (because + * there is no PMU), in which case it should simply return NULL. + */ +DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); + +u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +u64 __read_mostly hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +/* + * Propagate event elapsed time into the generic event. + * Can only be executed on the CPU where the event is active. + * Returns the delta events processed. + */ +u64 x86_perf_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - x86_pmu.cntval_bits; + u64 prev_raw_count, new_raw_count; + u64 delta; + + if (unlikely(!hwc->event_base)) + return 0; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ +again: + prev_raw_count = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new_raw_count); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +/* + * Find and validate any extra registers to set up. + */ +static int x86_pmu_extra_regs(u64 config, struct perf_event *event) +{ + struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs); + struct hw_perf_event_extra *reg; + struct extra_reg *er; + + reg = &event->hw.extra_reg; + + if (!extra_regs) + return 0; + + for (er = extra_regs; er->msr; er++) { + if (er->event != (config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; + + reg->idx = er->idx; + reg->config = event->attr.config1; + reg->reg = er->msr; + break; + } + return 0; +} + +static atomic_t active_events; +static atomic_t pmc_refcount; +static DEFINE_MUTEX(pmc_reserve_mutex); + +#ifdef CONFIG_X86_LOCAL_APIC + +static inline int get_possible_num_counters(void) +{ + int i, num_counters = x86_pmu.num_counters; + + if (!is_hybrid()) + return num_counters; + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) + num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters); + + return num_counters; +} + +static bool reserve_pmc_hardware(void) +{ + int i, num_counters = get_possible_num_counters(); + + for (i = 0; i < num_counters; i++) { + if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) + goto perfctr_fail; + } + + for (i = 0; i < num_counters; i++) { + if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) + goto eventsel_fail; + } + + return true; + +eventsel_fail: + for (i--; i >= 0; i--) + release_evntsel_nmi(x86_pmu_config_addr(i)); + + i = num_counters; + +perfctr_fail: + for (i--; i >= 0; i--) + release_perfctr_nmi(x86_pmu_event_addr(i)); + + return false; +} + +static void release_pmc_hardware(void) +{ + int i, num_counters = get_possible_num_counters(); + + for (i = 0; i < num_counters; i++) { + release_perfctr_nmi(x86_pmu_event_addr(i)); + release_evntsel_nmi(x86_pmu_config_addr(i)); + } +} + +#else + +static bool reserve_pmc_hardware(void) { return true; } +static void release_pmc_hardware(void) {} + +#endif + +bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) +{ + u64 val, val_fail = -1, val_new= ~0; + int i, reg, reg_fail = -1, ret = 0; + int bios_fail = 0; + int reg_safe = -1; + + /* + * Check to see if the BIOS enabled any of the counters, if so + * complain and bail. + */ + for (i = 0; i < num_counters; i++) { + reg = x86_pmu_config_addr(i); + ret = rdmsrl_safe(reg, &val); + if (ret) + goto msr_fail; + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } else { + reg_safe = i; + } + } + + if (num_counters_fixed) { + reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + ret = rdmsrl_safe(reg, &val); + if (ret) + goto msr_fail; + for (i = 0; i < num_counters_fixed; i++) { + if (fixed_counter_disabled(i, pmu)) + continue; + if (val & (0x03ULL << i*4)) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } + } + } + + /* + * If all the counters are enabled, the below test will always + * fail. The tools will also become useless in this scenario. + * Just fail and disable the hardware counters. + */ + + if (reg_safe == -1) { + reg = reg_safe; + goto msr_fail; + } + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + reg = x86_pmu_event_addr(reg_safe); + if (rdmsrl_safe(reg, &val)) + goto msr_fail; + val ^= 0xffffUL; + ret = wrmsrl_safe(reg, val); + ret |= rdmsrl_safe(reg, &val_new); + if (ret || val != val_new) + goto msr_fail; + + /* + * We still allow the PMU driver to operate: + */ + if (bios_fail) { + pr_cont("Broken BIOS detected, complain to your hardware vendor.\n"); + pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", + reg_fail, val_fail); + } + + return true; + +msr_fail: + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + pr_cont("PMU not available due to virtualization, using software events only.\n"); + } else { + pr_cont("Broken PMU hardware detected, using software events only.\n"); + pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n", + reg, val_new); + } + + return false; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + x86_release_hardware(); + atomic_dec(&active_events); +} + +void hw_perf_lbr_event_destroy(struct perf_event *event) +{ + hw_perf_event_destroy(event); + + /* undo the lbr/bts event accounting */ + x86_del_exclusive(x86_lbr_exclusive_lbr); +} + +static inline int x86_pmu_initialized(void) +{ + return x86_pmu.handle_irq != NULL; +} + +static inline int +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + unsigned int cache_type, cache_op, cache_result; + u64 config, val; + + config = attr->config; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); + + val = hybrid_var(event->pmu, hw_cache_event_ids)[cache_type][cache_op][cache_result]; + if (val == 0) + return -ENOENT; + + if (val == -1) + return -EINVAL; + + hwc->config |= val; + attr->config1 = hybrid_var(event->pmu, hw_cache_extra_regs)[cache_type][cache_op][cache_result]; + return x86_pmu_extra_regs(val, event); +} + +int x86_reserve_hardware(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&pmc_refcount)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&pmc_refcount) == 0) { + if (!reserve_pmc_hardware()) { + err = -EBUSY; + } else { + reserve_ds_buffers(); + reserve_lbr_buffers(); + } + } + if (!err) + atomic_inc(&pmc_refcount); + mutex_unlock(&pmc_reserve_mutex); + } + + return err; +} + +void x86_release_hardware(void) +{ + if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_ds_buffers(); + release_lbr_buffers(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Check if we can create event of a certain type (that no conflicting events + * are present). + */ +int x86_add_exclusive(unsigned int what) +{ + int i; + + /* + * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS. + * LBR and BTS are still mutually exclusive. + */ + if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) + goto out; + + if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { + mutex_lock(&pmc_reserve_mutex); + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { + if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) + goto fail_unlock; + } + atomic_inc(&x86_pmu.lbr_exclusive[what]); + mutex_unlock(&pmc_reserve_mutex); + } + +out: + atomic_inc(&active_events); + return 0; + +fail_unlock: + mutex_unlock(&pmc_reserve_mutex); + return -EBUSY; +} + +void x86_del_exclusive(unsigned int what) +{ + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ + if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) + return; + + atomic_dec(&x86_pmu.lbr_exclusive[what]); +} + +int x86_setup_perfctr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + u64 config; + + if (!is_sampling_event(event)) { + hwc->sample_period = x86_pmu.max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + if (attr->type == event->pmu->type) + return x86_pmu_extra_regs(event->attr.config, event); + + if (attr->type == PERF_TYPE_HW_CACHE) + return set_ext_hw_attr(hwc, event); + + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + + /* + * The generic map: + */ + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + hwc->config |= config; + + return 0; +} + +/* + * check that branch_sample_type is compatible with + * settings needed for precise_ip > 1 which implies + * using the LBR to capture ALL taken branches at the + * priv levels of the measurement + */ +static inline int precise_br_compat(struct perf_event *event) +{ + u64 m = event->attr.branch_sample_type; + u64 b = 0; + + /* must capture all branches */ + if (!(m & PERF_SAMPLE_BRANCH_ANY)) + return 0; + + m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_user) + b |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + b |= PERF_SAMPLE_BRANCH_KERNEL; + + /* + * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 + */ + + return m == b; +} + +int x86_pmu_max_precise(void) +{ + int precise = 0; + + /* Support for constant skid */ + if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { + precise++; + + /* Support for IP fixup */ + if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) + precise++; + + if (x86_pmu.pebs_prec_dist) + precise++; + } + return precise; +} + +int x86_pmu_hw_config(struct perf_event *event) +{ + if (event->attr.precise_ip) { + int precise = x86_pmu_max_precise(); + + if (event->attr.precise_ip > precise) + return -EOPNOTSUPP; + + /* There's no sense in having PEBS for non sampling events: */ + if (!is_sampling_event(event)) + return -EINVAL; + } + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; + } + } + + if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) + event->attach_state |= PERF_ATTACH_TASK_DATA; + + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + event->hw.config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to + */ + if (!event->attr.exclude_user) + event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; + if (!event->attr.exclude_kernel) + event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; + + if (event->attr.type == event->pmu->type) + event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; + + if (event->attr.sample_period && x86_pmu.limit_period) { + s64 left = event->attr.sample_period; + x86_pmu.limit_period(event, &left); + if (left > event->attr.sample_period) + return -EINVAL; + } + + /* sample_regs_user never support XMM registers */ + if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK)) + return -EINVAL; + /* + * Besides the general purpose registers, XMM registers may + * be collected in PEBS on some platforms, e.g. Icelake + */ + if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { + if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS)) + return -EINVAL; + + if (!event->attr.precise_ip) + return -EINVAL; + } + + return x86_setup_perfctr(event); +} + +/* + * Setup the hardware configuration for a given attr_type + */ +static int __x86_pmu_event_init(struct perf_event *event) +{ + int err; + + if (!x86_pmu_initialized()) + return -ENODEV; + + err = x86_reserve_hardware(); + if (err) + return err; + + atomic_inc(&active_events); + event->destroy = hw_perf_event_destroy; + + event->hw.idx = -1; + event->hw.last_cpu = -1; + event->hw.last_tag = ~0ULL; + + /* mark unused */ + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + + return x86_pmu.hw_config(event); +} + +void x86_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + rdmsrl(x86_pmu_config_addr(idx), val); + if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) + continue; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(x86_pmu_config_addr(idx), val); + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(idx + 1), 0); + } +} + +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data) +{ + return static_call(x86_pmu_guest_get_msrs)(nr, data); +} +EXPORT_SYMBOL_GPL(perf_guest_get_msrs); + +/* + * There may be PMI landing after enabled=0. The PMI hitting could be before or + * after disable_all. + * + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler. + * It will not be re-enabled in the NMI handler again, because enabled=0. After + * handling the NMI, disable_all will be called, which will not change the + * state either. If PMI hits after disable_all, the PMU is already disabled + * before entering NMI handler. The NMI handler will not change the state + * either. + * + * So either situation is harmless. + */ +static void x86_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu_initialized()) + return; + + if (!cpuc->enabled) + return; + + cpuc->n_added = 0; + cpuc->enabled = 0; + barrier(); + + static_call(x86_pmu_disable_all)(); +} + +void x86_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + } +} + +static inline int is_x86_event(struct perf_event *event) +{ + int i; + + if (!is_hybrid()) + return event->pmu == &pmu; + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu) + return true; + } + + return false; +} + +struct pmu *x86_get_pmu(unsigned int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + /* + * All CPUs of the hybrid type have been offline. + * The x86_get_pmu() should not be invoked. + */ + if (WARN_ON_ONCE(!cpuc->pmu)) + return &pmu; + + return cpuc->pmu; +} +/* + * Event scheduler state: + * + * Assign events iterating over all events and counters, beginning + * with events with least weights first. Keep the current iterator + * state in struct sched_state. + */ +struct sched_state { + int weight; + int event; /* event index */ + int counter; /* counter index */ + int unassigned; /* number of events to be assigned left */ + int nr_gp; /* number of GP counters used */ + u64 used; +}; + +/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ +#define SCHED_STATES_MAX 2 + +struct perf_sched { + int max_weight; + int max_events; + int max_gp; + int saved_states; + struct event_constraint **constraints; + struct sched_state state; + struct sched_state saved[SCHED_STATES_MAX]; +}; + +/* + * Initialize iterator that runs through all events and counters. + */ +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, + int num, int wmin, int wmax, int gpmax) +{ + int idx; + + memset(sched, 0, sizeof(*sched)); + sched->max_events = num; + sched->max_weight = wmax; + sched->max_gp = gpmax; + sched->constraints = constraints; + + for (idx = 0; idx < num; idx++) { + if (constraints[idx]->weight == wmin) + break; + } + + sched->state.event = idx; /* start with min weight */ + sched->state.weight = wmin; + sched->state.unassigned = num; +} + +static void perf_sched_save_state(struct perf_sched *sched) +{ + if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) + return; + + sched->saved[sched->saved_states] = sched->state; + sched->saved_states++; +} + +static bool perf_sched_restore_state(struct perf_sched *sched) +{ + if (!sched->saved_states) + return false; + + sched->saved_states--; + sched->state = sched->saved[sched->saved_states]; + + /* this assignment didn't work out */ + /* XXX broken vs EVENT_PAIR */ + sched->state.used &= ~BIT_ULL(sched->state.counter); + + /* try the next one */ + sched->state.counter++; + + return true; +} + +/* + * Select a counter for the current event to schedule. Return true on + * success. + */ +static bool __perf_sched_find_counter(struct perf_sched *sched) +{ + struct event_constraint *c; + int idx; + + if (!sched->state.unassigned) + return false; + + if (sched->state.event >= sched->max_events) + return false; + + c = sched->constraints[sched->state.event]; + /* Prefer fixed purpose counters */ + if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { + idx = INTEL_PMC_IDX_FIXED; + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { + u64 mask = BIT_ULL(idx); + + if (sched->state.used & mask) + continue; + + sched->state.used |= mask; + goto done; + } + } + + /* Grab the first unused counter starting with idx */ + idx = sched->state.counter; + for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { + u64 mask = BIT_ULL(idx); + + if (c->flags & PERF_X86_EVENT_PAIR) + mask |= mask << 1; + + if (sched->state.used & mask) + continue; + + if (sched->state.nr_gp++ >= sched->max_gp) + return false; + + sched->state.used |= mask; + goto done; + } + + return false; + +done: + sched->state.counter = idx; + + if (c->overlap) + perf_sched_save_state(sched); + + return true; +} + +static bool perf_sched_find_counter(struct perf_sched *sched) +{ + while (!__perf_sched_find_counter(sched)) { + if (!perf_sched_restore_state(sched)) + return false; + } + + return true; +} + +/* + * Go through all unassigned events and find the next one to schedule. + * Take events with the least weight first. Return true on success. + */ +static bool perf_sched_next_event(struct perf_sched *sched) +{ + struct event_constraint *c; + + if (!sched->state.unassigned || !--sched->state.unassigned) + return false; + + do { + /* next event */ + sched->state.event++; + if (sched->state.event >= sched->max_events) { + /* next weight */ + sched->state.event = 0; + sched->state.weight++; + if (sched->state.weight > sched->max_weight) + return false; + } + c = sched->constraints[sched->state.event]; + } while (c->weight != sched->state.weight); + + sched->state.counter = 0; /* start with first counter */ + + return true; +} + +/* + * Assign a counter for each event. + */ +int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int gpmax, int *assign) +{ + struct perf_sched sched; + + perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax); + + do { + if (!perf_sched_find_counter(&sched)) + break; /* failed */ + if (assign) + assign[sched.state.event] = sched.state.counter; + } while (perf_sched_next_event(&sched)); + + return sched.state.unassigned; +} +EXPORT_SYMBOL_GPL(perf_assign_events); + +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + int num_counters = hybrid(cpuc->pmu, num_counters); + struct event_constraint *c; + struct perf_event *e; + int n0, i, wmin, wmax, unsched = 0; + struct hw_perf_event *hwc; + u64 used_mask = 0; + + /* + * Compute the number of events already present; see x86_pmu_add(), + * validate_group() and x86_pmu_commit_txn(). For the former two + * cpuc->n_events hasn't been updated yet, while for the latter + * cpuc->n_txn contains the number of events added in the current + * transaction. + */ + n0 = cpuc->n_events; + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) + n0 -= cpuc->n_txn; + + static_call_cond(x86_pmu_start_scheduling)(cpuc); + + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { + c = cpuc->event_constraint[i]; + + /* + * Previously scheduled events should have a cached constraint, + * while new events should not have one. + */ + WARN_ON_ONCE((c && i >= n0) || (!c && i < n0)); + + /* + * Request constraints for new events; or for those events that + * have a dynamic constraint -- for those the constraint can + * change due to external factors (sibling state, allow_tfa). + */ + if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) { + c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]); + cpuc->event_constraint[i] = c; + } + + wmin = min(wmin, c->weight); + wmax = max(wmax, c->weight); + } + + /* + * fastpath, try to reuse previous register + */ + for (i = 0; i < n; i++) { + u64 mask; + + hwc = &cpuc->event_list[i]->hw; + c = cpuc->event_constraint[i]; + + /* never assigned */ + if (hwc->idx == -1) + break; + + /* constraint still honored */ + if (!test_bit(hwc->idx, c->idxmsk)) + break; + + mask = BIT_ULL(hwc->idx); + if (is_counter_pair(hwc)) + mask |= mask << 1; + + /* not already used */ + if (used_mask & mask) + break; + + used_mask |= mask; + + if (assign) + assign[i] = hwc->idx; + } + + /* slow path */ + if (i != n) { + int gpmax = num_counters; + + /* + * Do not allow scheduling of more than half the available + * generic counters. + * + * This helps avoid counter starvation of sibling thread by + * ensuring at most half the counters cannot be in exclusive + * mode. There is no designated counters for the limits. Any + * N/2 counters can be used. This helps with events with + * specific counter constraints. + */ + if (is_ht_workaround_enabled() && !cpuc->is_fake && + READ_ONCE(cpuc->excl_cntrs->exclusive_present)) + gpmax /= 2; + + /* + * Reduce the amount of available counters to allow fitting + * the extra Merge events needed by large increment events. + */ + if (x86_pmu.flags & PMU_FL_PAIR) { + gpmax = num_counters - cpuc->n_pair; + WARN_ON(gpmax <= 0); + } + + unsched = perf_assign_events(cpuc->event_constraint, n, wmin, + wmax, gpmax, assign); + } + + /* + * In case of success (unsched = 0), mark events as committed, + * so we do not put_constraint() in case new events are added + * and fail to be scheduled + * + * We invoke the lower level commit callback to lock the resource + * + * We do not need to do all of this in case we are called to + * validate an event group (assign == NULL) + */ + if (!unsched && assign) { + for (i = 0; i < n; i++) + static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]); + } else { + for (i = n0; i < n; i++) { + e = cpuc->event_list[i]; + + /* + * release events that failed scheduling + */ + static_call_cond(x86_pmu_put_event_constraints)(cpuc, e); + + cpuc->event_constraint[i] = NULL; + } + } + + static_call_cond(x86_pmu_stop_scheduling)(cpuc); + + return unsched ? -EINVAL : 0; +} + +static int add_nr_metric_event(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (is_metric_event(event)) { + if (cpuc->n_metric == INTEL_TD_METRIC_NUM) + return -EINVAL; + cpuc->n_metric++; + cpuc->n_txn_metric++; + } + + return 0; +} + +static void del_nr_metric_event(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (is_metric_event(event)) + cpuc->n_metric--; +} + +static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event, + int max_count, int n) +{ + union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap); + + if (intel_cap.perf_metrics && add_nr_metric_event(cpuc, event)) + return -EINVAL; + + if (n >= max_count + cpuc->n_metric) + return -EINVAL; + + cpuc->event_list[n] = event; + if (is_counter_pair(&event->hw)) { + cpuc->n_pair++; + cpuc->n_txn_pair++; + } + + return 0; +} + +/* + * dogrp: true if must collect siblings events (group) + * returns total number of events and error code + */ +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) +{ + int num_counters = hybrid(cpuc->pmu, num_counters); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + struct perf_event *event; + int n, max_count; + + max_count = num_counters + num_counters_fixed; + + /* current number of events already accepted */ + n = cpuc->n_events; + if (!cpuc->n_events) + cpuc->pebs_output = 0; + + if (!cpuc->is_fake && leader->attr.precise_ip) { + /* + * For PEBS->PT, if !aux_event, the group leader (PT) went + * away, the group was broken down and this singleton event + * can't schedule any more. + */ + if (is_pebs_pt(leader) && !leader->aux_event) + return -EINVAL; + + /* + * pebs_output: 0: no PEBS so far, 1: PT, 2: DS + */ + if (cpuc->pebs_output && + cpuc->pebs_output != is_pebs_pt(leader) + 1) + return -EINVAL; + + cpuc->pebs_output = is_pebs_pt(leader) + 1; + } + + if (is_x86_event(leader)) { + if (collect_event(cpuc, leader, max_count, n)) + return -EINVAL; + n++; + } + + if (!dogrp) + return n; + + for_each_sibling_event(event, leader) { + if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (collect_event(cpuc, event, max_count, n)) + return -EINVAL; + + n++; + } + return n; +} + +static inline void x86_assign_hw_event(struct perf_event *event, + struct cpu_hw_events *cpuc, int i) +{ + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = hwc->idx = cpuc->assign[i]; + hwc->last_cpu = smp_processor_id(); + hwc->last_tag = ++cpuc->tags[i]; + + static_call_cond(x86_pmu_assign)(event, idx); + + switch (hwc->idx) { + case INTEL_PMC_IDX_FIXED_BTS: + case INTEL_PMC_IDX_FIXED_VLBR: + hwc->config_base = 0; + hwc->event_base = 0; + break; + + case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: + /* All the metric events are mapped onto the fixed counter 3. */ + idx = INTEL_PMC_IDX_FIXED_SLOTS; + fallthrough; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1: + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + + (idx - INTEL_PMC_IDX_FIXED); + hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | + INTEL_PMC_FIXED_RDPMC_BASE; + break; + + default: + hwc->config_base = x86_pmu_config_addr(hwc->idx); + hwc->event_base = x86_pmu_event_addr(hwc->idx); + hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); + break; + } +} + +/** + * x86_perf_rdpmc_index - Return PMC counter used for event + * @event: the perf_event to which the PMC counter was assigned + * + * The counter assigned to this performance event may change if interrupts + * are enabled. This counter should thus never be used while interrupts are + * enabled. Before this function is used to obtain the assigned counter the + * event should be checked for validity using, for example, + * perf_event_read_local(), within the same interrupt disabled section in + * which this counter is planned to be used. + * + * Return: The index of the performance monitoring counter assigned to + * @perf_event. + */ +int x86_perf_rdpmc_index(struct perf_event *event) +{ + lockdep_assert_irqs_disabled(); + + return event->hw.event_base_rdpmc; +} + +static inline int match_prev_assignment(struct hw_perf_event *hwc, + struct cpu_hw_events *cpuc, + int i) +{ + return hwc->idx == cpuc->assign[i] && + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i]; +} + +static void x86_pmu_start(struct perf_event *event, int flags); + +static void x86_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event; + struct hw_perf_event *hwc; + int i, added = cpuc->n_added; + + if (!x86_pmu_initialized()) + return; + + if (cpuc->enabled) + return; + + if (cpuc->n_added) { + int n_running = cpuc->n_events - cpuc->n_added; + /* + * apply assignment obtained either from + * hw_perf_group_sched_in() or x86_pmu_enable() + * + * step1: save events moving to new counters + */ + for (i = 0; i < n_running; i++) { + event = cpuc->event_list[i]; + hwc = &event->hw; + + /* + * we can avoid reprogramming counter if: + * - assigned same counter as last time + * - running on same CPU as last time + * - no other event has used the counter since + */ + if (hwc->idx == -1 || + match_prev_assignment(hwc, cpuc, i)) + continue; + + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); + } + + /* + * step2: reprogram moved events into new counters + */ + for (i = 0; i < cpuc->n_events; i++) { + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (!match_prev_assignment(hwc, cpuc, i)) + x86_assign_hw_event(event, cpuc, i); + else if (i < n_running) + continue; + + if (hwc->state & PERF_HES_ARCH) + continue; + + /* + * if cpuc->enabled = 0, then no wrmsr as + * per x86_pmu_enable_event() + */ + x86_pmu_start(event, PERF_EF_RELOAD); + } + cpuc->n_added = 0; + perf_events_lapic_init(); + } + + cpuc->enabled = 1; + barrier(); + + static_call(x86_pmu_enable_all)(added); +} + +DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); + +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the event disabled in hw: + */ +int x86_perf_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0, idx = hwc->idx; + + if (unlikely(!hwc->event_base)) + return 0; + + /* + * If we are way outside a reasonable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + /* + * Quirk: certain CPUs dont like it if just 1 hw_event is left: + */ + if (unlikely(left < 2)) + left = 2; + + if (left > x86_pmu.max_period) + left = x86_pmu.max_period; + + static_call_cond(x86_pmu_limit_period)(event, &left); + + this_cpu_write(pmc_prev_left[idx], left); + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + local64_set(&hwc->prev_count, (u64)-left); + + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + + /* + * Sign extend the Merge event counter's upper 16 bits since + * we currently declare a 48-bit counter width + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff); + + perf_event_update_userpage(event); + + return ret; +} + +void x86_pmu_enable_event(struct perf_event *event) +{ + if (__this_cpu_read(cpu_hw_events.enabled)) + __x86_pmu_enable_event(&event->hw, + ARCH_PERFMON_EVENTSEL_ENABLE); +} + +/* + * Add a single event to the PMU. + * + * The event is added to the group of enabled events + * but only if it can be scheduled with existing events. + */ +static int x86_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc; + int assign[X86_PMC_IDX_MAX]; + int n, n0, ret; + + hwc = &event->hw; + + n0 = cpuc->n_events; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + /* + * If group events scheduling transaction was started, + * skip the schedulability test here, it will be performed + * at commit time (->commit_txn) as a whole. + * + * If commit fails, we'll call ->del() on all events + * for which ->add() was called. + */ + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) + goto done_collect; + + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); + if (ret) + goto out; + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n*sizeof(int)); + +done_collect: + /* + * Commit the collect_events() state. See x86_pmu_del() and + * x86_pmu_*_txn(). + */ + cpuc->n_events = n; + cpuc->n_added += n - n0; + cpuc->n_txn += n - n0; + + /* + * This is before x86_pmu_enable() will call x86_pmu_start(), + * so we enable LBRs before an event needs them etc.. + */ + static_call_cond(x86_pmu_add)(event); + + ret = 0; +out: + return ret; +} + +static void x86_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx = event->hw.idx; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + static_call(x86_pmu_set_period)(event); + } + + event->hw.state = 0; + + cpuc->events[idx] = event; + __set_bit(idx, cpuc->active_mask); + static_call(x86_pmu_enable)(event); + perf_event_update_userpage(event); +} + +void perf_event_print_debug(void) +{ + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + u64 pebs, debugctl; + int cpu = smp_processor_id(); + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int num_counters = hybrid(cpuc->pmu, num_counters); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); + unsigned long flags; + int idx; + + if (!num_counters) + return; + + local_irq_save(flags); + + if (x86_pmu.version >= 2) { + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + + pr_info("\n"); + pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); + pr_info("CPU#%d: status: %016llx\n", cpu, status); + pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); + pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + if (pebs_constraints) { + rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); + pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); + } + if (x86_pmu.lbr_nr) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); + } + } + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); + + for (idx = 0; idx < num_counters; idx++) { + rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); + rdmsrl(x86_pmu_event_addr(idx), pmc_count); + + prev_left = per_cpu(pmc_prev_left[idx], cpu); + + pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", + cpu, idx, pmc_ctrl); + pr_info("CPU#%d: gen-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + pr_info("CPU#%d: gen-PMC%d left: %016llx\n", + cpu, idx, prev_left); + } + for (idx = 0; idx < num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx, cpuc->pmu)) + continue; + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + + pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + } + local_irq_restore(flags); +} + +void x86_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (test_bit(hwc->idx, cpuc->active_mask)) { + static_call(x86_pmu_disable)(event); + __clear_bit(hwc->idx, cpuc->active_mask); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + static_call(x86_pmu_update)(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static void x86_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap); + int i; + + /* + * If we're called during a txn, we only need to undo x86_pmu.add. + * The events never got scheduled and ->cancel_txn will truncate + * the event_list. + * + * XXX assumes any ->del() called during a TXN will only be on + * an event added during that same TXN. + */ + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) + goto do_del; + + __set_bit(event->hw.idx, cpuc->dirty); + + /* + * Not a TXN, therefore cleanup properly. + */ + x86_pmu_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < cpuc->n_events; i++) { + if (event == cpuc->event_list[i]) + break; + } + + if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ + return; + + /* If we have a newly added event; make sure to decrease n_added. */ + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + + static_call_cond(x86_pmu_put_event_constraints)(cpuc, event); + + /* Delete the array entry. */ + while (++i < cpuc->n_events) { + cpuc->event_list[i-1] = cpuc->event_list[i]; + cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + } + cpuc->event_constraint[i-1] = NULL; + --cpuc->n_events; + if (intel_cap.perf_metrics) + del_nr_metric_event(cpuc, event); + + perf_event_update_userpage(event); + +do_del: + + /* + * This is after x86_pmu_stop(); so we disable LBRs after any + * event can need them etc.. + */ + static_call_cond(x86_pmu_del)(event); +} + +int x86_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + int idx, handled = 0; + u64 val; + + cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * Some chipsets need to unmask the LVTPC in a particular spot + * inside the nmi handler. As a result, the unmasking was pushed + * into all the nmi handlers. + * + * This generic handler doesn't seem to have any issues where the + * unmasking occurs so it was left at the top. + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + + val = static_call(x86_pmu_update)(event); + if (val & (1ULL << (x86_pmu.cntval_bits - 1))) + continue; + + /* + * event overflow + */ + handled++; + + if (!static_call(x86_pmu_set_period)(event)) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (has_branch_stack(event)) { + data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +void perf_events_lapic_init(void) +{ + if (!x86_pmu.apic || !x86_pmu_initialized()) + return; + + /* + * Always use NMI for PMU + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); +} + +static int +perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) +{ + u64 start_clock; + u64 finish_clock; + int ret; + + /* + * All PMUs/events that share this PMI handler should make sure to + * increment active_events for their events. + */ + if (!atomic_read(&active_events)) + return NMI_DONE; + + start_clock = sched_clock(); + ret = static_call(x86_pmu_handle_irq)(regs); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + + return ret; +} +NOKPROBE_SYMBOL(perf_event_nmi_handler); + +struct event_constraint emptyconstraint; +struct event_constraint unconstrained; + +static int x86_pmu_prepare_cpu(unsigned int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int i; + + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) + cpuc->kfree_on_online[i] = NULL; + if (x86_pmu.cpu_prepare) + return x86_pmu.cpu_prepare(cpu); + return 0; +} + +static int x86_pmu_dead_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_dead) + x86_pmu.cpu_dead(cpu); + return 0; +} + +static int x86_pmu_online_cpu(unsigned int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int i; + + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { + kfree(cpuc->kfree_on_online[i]); + cpuc->kfree_on_online[i] = NULL; + } + return 0; +} + +static int x86_pmu_starting_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_starting) + x86_pmu.cpu_starting(cpu); + return 0; +} + +static int x86_pmu_dying_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_dying) + x86_pmu.cpu_dying(cpu); + return 0; +} + +static void __init pmu_check_apic(void) +{ + if (boot_cpu_has(X86_FEATURE_APIC)) + return; + + x86_pmu.apic = 0; + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); + + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + +} + +static struct attribute_group x86_pmu_format_group __ro_after_init = { + .name = "format", + .attrs = NULL, +}; + +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + u64 config = 0; + + if (pmu_attr->id < x86_pmu.max_events) + config = x86_pmu.event_map(pmu_attr->id); + + /* string trumps id */ + if (pmu_attr->event_str) + return sprintf(page, "%s\n", pmu_attr->event_str); + + return x86_pmu.events_sysfs_show(page, config); +} +EXPORT_SYMBOL_GPL(events_sysfs_show); + +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + +ssize_t events_hybrid_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_hybrid_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_hybrid_attr, attr); + struct x86_hybrid_pmu *pmu; + const char *str, *next_str; + int i; + + if (hweight64(pmu_attr->pmu_type) == 1) + return sprintf(page, "%s", pmu_attr->event_str); + + /* + * Hybrid PMUs may support the same event name, but with different + * event encoding, e.g., the mem-loads event on an Atom PMU has + * different event encoding from a Core PMU. + * + * The event_str includes all event encodings. Each event encoding + * is divided by ";". The order of the event encodings must follow + * the order of the hybrid PMU index. + */ + pmu = container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + + str = pmu_attr->event_str; + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type)) + continue; + if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) { + next_str = strchr(str, ';'); + if (next_str) + return snprintf(page, next_str - str + 1, "%s", str); + else + return sprintf(page, "%s", str); + } + str = strchr(str, ';'); + str++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(events_hybrid_sysfs_show); + +EVENT_ATTR(cpu-cycles, CPU_CYCLES ); +EVENT_ATTR(instructions, INSTRUCTIONS ); +EVENT_ATTR(cache-references, CACHE_REFERENCES ); +EVENT_ATTR(cache-misses, CACHE_MISSES ); +EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS ); +EVENT_ATTR(branch-misses, BRANCH_MISSES ); +EVENT_ATTR(bus-cycles, BUS_CYCLES ); +EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND ); +EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND ); +EVENT_ATTR(ref-cycles, REF_CPU_CYCLES ); + +static struct attribute *empty_attrs; + +static struct attribute *events_attr[] = { + EVENT_PTR(CPU_CYCLES), + EVENT_PTR(INSTRUCTIONS), + EVENT_PTR(CACHE_REFERENCES), + EVENT_PTR(CACHE_MISSES), + EVENT_PTR(BRANCH_INSTRUCTIONS), + EVENT_PTR(BRANCH_MISSES), + EVENT_PTR(BUS_CYCLES), + EVENT_PTR(STALLED_CYCLES_FRONTEND), + EVENT_PTR(STALLED_CYCLES_BACKEND), + EVENT_PTR(REF_CPU_CYCLES), + NULL, +}; + +/* + * Remove all undefined events (x86_pmu.event_map(id) == 0) + * out of events_attr attributes. + */ +static umode_t +is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct perf_pmu_events_attr *pmu_attr; + + if (idx >= x86_pmu.max_events) + return 0; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + /* str trumps id */ + return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; +} + +static struct attribute_group x86_pmu_events_group __ro_after_init = { + .name = "events", + .attrs = events_attr, + .is_visible = is_visible, +}; + +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) +{ + u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; + u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24; + bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE); + bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL); + bool any = (config & ARCH_PERFMON_EVENTSEL_ANY); + bool inv = (config & ARCH_PERFMON_EVENTSEL_INV); + ssize_t ret; + + /* + * We have whole page size to spend and just little data + * to write, so we can safely use sprintf. + */ + ret = sprintf(page, "event=0x%02llx", event); + + if (umask) + ret += sprintf(page + ret, ",umask=0x%02llx", umask); + + if (edge) + ret += sprintf(page + ret, ",edge"); + + if (pc) + ret += sprintf(page + ret, ",pc"); + + if (any) + ret += sprintf(page + ret, ",any"); + + if (inv) + ret += sprintf(page + ret, ",inv"); + + if (cmask) + ret += sprintf(page + ret, ",cmask=0x%02llx", cmask); + + ret += sprintf(page + ret, "\n"); + + return ret; +} + +static struct attribute_group x86_pmu_attr_group; +static struct attribute_group x86_pmu_caps_group; + +static void x86_pmu_static_call_update(void) +{ + static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq); + static_call_update(x86_pmu_disable_all, x86_pmu.disable_all); + static_call_update(x86_pmu_enable_all, x86_pmu.enable_all); + static_call_update(x86_pmu_enable, x86_pmu.enable); + static_call_update(x86_pmu_disable, x86_pmu.disable); + + static_call_update(x86_pmu_assign, x86_pmu.assign); + + static_call_update(x86_pmu_add, x86_pmu.add); + static_call_update(x86_pmu_del, x86_pmu.del); + static_call_update(x86_pmu_read, x86_pmu.read); + + static_call_update(x86_pmu_set_period, x86_pmu.set_period); + static_call_update(x86_pmu_update, x86_pmu.update); + static_call_update(x86_pmu_limit_period, x86_pmu.limit_period); + + static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events); + static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints); + static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints); + + static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling); + static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling); + static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); + + static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); + static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); + + static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); + static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); + + static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs); +} + +static void _x86_pmu_read(struct perf_event *event) +{ + static_call(x86_pmu_update)(event); +} + +void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, + u64 intel_ctrl) +{ + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.cntval_bits); + pr_info("... generic registers: %d\n", num_counters); + pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %lu\n", + hweight64((((1ULL << num_counters_fixed) - 1) + << INTEL_PMC_IDX_FIXED) & intel_ctrl)); + pr_info("... event mask: %016Lx\n", intel_ctrl); +} + +/* + * The generic code is not hybrid friendly. The hybrid_pmu->pmu + * of the first registered PMU is unconditionally assigned to + * each possible cpuctx->ctx.pmu. + * Update the correct hybrid PMU to the cpuctx->ctx.pmu. + */ +void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu) +{ + struct perf_cpu_context *cpuctx; + + if (!pmu->pmu_cpu_context) + return; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + cpuctx->ctx.pmu = pmu; +} + +static int __init init_hw_perf_events(void) +{ + struct x86_pmu_quirk *quirk; + int err; + + pr_info("Performance Events: "); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + err = intel_pmu_init(); + break; + case X86_VENDOR_AMD: + err = amd_pmu_init(); + break; + case X86_VENDOR_HYGON: + err = amd_pmu_init(); + x86_pmu.name = "HYGON"; + break; + case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: + err = zhaoxin_pmu_init(); + break; + default: + err = -ENOTSUPP; + } + if (err != 0) { + pr_cont("no PMU driver, software events only.\n"); + err = 0; + goto out_bad_pmu; + } + + pmu_check_apic(); + + /* sanity check that the hardware exists or is emulated */ + if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed)) + goto out_bad_pmu; + + pr_cont("%s PMU driver.\n", x86_pmu.name); + + x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + + for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) + quirk->func(); + + if (!x86_pmu.intel_ctrl) + x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + + perf_events_lapic_init(); + register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); + + unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, + 0, x86_pmu.num_counters, 0, 0); + + x86_pmu_format_group.attrs = x86_pmu.format_attrs; + + if (!x86_pmu.events_sysfs_show) + x86_pmu_events_group.attrs = &empty_attrs; + + pmu.attr_update = x86_pmu.attr_update; + + if (!is_hybrid()) { + x86_pmu_show_pmu_cap(x86_pmu.num_counters, + x86_pmu.num_counters_fixed, + x86_pmu.intel_ctrl); + } + + if (!x86_pmu.read) + x86_pmu.read = _x86_pmu_read; + + if (!x86_pmu.guest_get_msrs) + x86_pmu.guest_get_msrs = (void *)&__static_call_return0; + + if (!x86_pmu.set_period) + x86_pmu.set_period = x86_perf_event_set_period; + + if (!x86_pmu.update) + x86_pmu.update = x86_perf_event_update; + + x86_pmu_static_call_update(); + + /* + * Install callbacks. Core will call them for each online + * cpu. + */ + err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare", + x86_pmu_prepare_cpu, x86_pmu_dead_cpu); + if (err) + return err; + + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING, + "perf/x86:starting", x86_pmu_starting_cpu, + x86_pmu_dying_cpu); + if (err) + goto out; + + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online", + x86_pmu_online_cpu, NULL); + if (err) + goto out1; + + if (!is_hybrid()) { + err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + if (err) + goto out2; + } else { + u8 cpu_type = get_this_hybrid_cpu_type(); + struct x86_hybrid_pmu *hybrid_pmu; + int i, j; + + if (!cpu_type && x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + hybrid_pmu = &x86_pmu.hybrid_pmu[i]; + + hybrid_pmu->pmu = pmu; + hybrid_pmu->pmu.type = -1; + hybrid_pmu->pmu.attr_update = x86_pmu.attr_update; + hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS; + hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE; + + err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name, + (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1); + if (err) + break; + + if (cpu_type == hybrid_pmu->cpu_type) + x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id()); + } + + if (i < x86_pmu.num_hybrid_pmus) { + for (j = 0; j < i; j++) + perf_pmu_unregister(&x86_pmu.hybrid_pmu[j].pmu); + pr_warn("Failed to register hybrid PMUs\n"); + kfree(x86_pmu.hybrid_pmu); + x86_pmu.hybrid_pmu = NULL; + x86_pmu.num_hybrid_pmus = 0; + goto out2; + } + } + + return 0; + +out2: + cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE); +out1: + cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING); +out: + cpuhp_remove_state(CPUHP_PERF_X86_PREPARE); +out_bad_pmu: + memset(&x86_pmu, 0, sizeof(x86_pmu)); + return err; +} +early_initcall(init_hw_perf_events); + +static void x86_pmu_read(struct perf_event *event) +{ + static_call(x86_pmu_read)(event); +} + +/* + * Start group events scheduling transaction + * Set the flag to make pmu::enable() not perform the + * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. + */ +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */ + + cpuc->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + + perf_pmu_disable(pmu); + __this_cpu_write(cpu_hw_events.n_txn, 0); + __this_cpu_write(cpu_hw_events.n_txn_pair, 0); + __this_cpu_write(cpu_hw_events.n_txn_metric, 0); +} + +/* + * Stop group events scheduling transaction + * Clear the flag and pmu::enable() will perform the + * schedulability test. + */ +static void x86_pmu_cancel_txn(struct pmu *pmu) +{ + unsigned int txn_flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + txn_flags = cpuc->txn_flags; + cpuc->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + + /* + * Truncate collected array by the number of events added in this + * transaction. See x86_pmu_add() and x86_pmu_*_txn(). + */ + __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); + __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); + __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair)); + __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric)); + perf_pmu_enable(pmu); +} + +/* + * Commit group events scheduling transaction + * Perform the group schedulability test as a whole + * Return 0 if success + * + * Does not cancel the transaction on failure; expects the caller to do this. + */ +static int x86_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int assign[X86_PMC_IDX_MAX]; + int n, ret; + + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + + n = cpuc->n_events; + + if (!x86_pmu_initialized()) + return -EAGAIN; + + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); + if (ret) + return ret; + + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n*sizeof(int)); + + cpuc->txn_flags = 0; + perf_pmu_enable(pmu); + return 0; +} +/* + * a fake_cpuc is used to validate event groups. Due to + * the extra reg logic, we need to also allocate a fake + * per_core and per_cpu structure. Otherwise, group events + * using extra reg may conflict without the kernel being + * able to catch this when the last event gets added to + * the group. + */ +static void free_fake_cpuc(struct cpu_hw_events *cpuc) +{ + intel_cpuc_finish(cpuc); + kfree(cpuc); +} + +static struct cpu_hw_events *allocate_fake_cpuc(struct pmu *event_pmu) +{ + struct cpu_hw_events *cpuc; + int cpu; + + cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); + if (!cpuc) + return ERR_PTR(-ENOMEM); + cpuc->is_fake = 1; + + if (is_hybrid()) { + struct x86_hybrid_pmu *h_pmu; + + h_pmu = hybrid_pmu(event_pmu); + if (cpumask_empty(&h_pmu->supported_cpus)) + goto error; + cpu = cpumask_first(&h_pmu->supported_cpus); + } else + cpu = raw_smp_processor_id(); + cpuc->pmu = event_pmu; + + if (intel_cpuc_prepare(cpuc, cpu)) + goto error; + + return cpuc; +error: + free_fake_cpuc(cpuc); + return ERR_PTR(-ENOMEM); +} + +/* + * validate that we can schedule this event + */ +static int validate_event(struct perf_event *event) +{ + struct cpu_hw_events *fake_cpuc; + struct event_constraint *c; + int ret = 0; + + fake_cpuc = allocate_fake_cpuc(event->pmu); + if (IS_ERR(fake_cpuc)) + return PTR_ERR(fake_cpuc); + + c = x86_pmu.get_event_constraints(fake_cpuc, 0, event); + + if (!c || !c->weight) + ret = -EINVAL; + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(fake_cpuc, event); + + free_fake_cpuc(fake_cpuc); + + return ret; +} + +/* + * validate a single event group + * + * validation include: + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters + * + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ +static int validate_group(struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct cpu_hw_events *fake_cpuc; + int ret = -EINVAL, n; + + /* + * Reject events from different hybrid PMUs. + */ + if (is_hybrid()) { + struct perf_event *sibling; + struct pmu *pmu = NULL; + + if (is_x86_event(leader)) + pmu = leader->pmu; + + for_each_sibling_event(sibling, leader) { + if (!is_x86_event(sibling)) + continue; + if (!pmu) + pmu = sibling->pmu; + else if (pmu != sibling->pmu) + return ret; + } + } + + fake_cpuc = allocate_fake_cpuc(event->pmu); + if (IS_ERR(fake_cpuc)) + return PTR_ERR(fake_cpuc); + /* + * the event is not yet connected with its + * siblings therefore we must first collect + * existing siblings, then add the new event + * before we can simulate the scheduling + */ + n = collect_events(fake_cpuc, leader, true); + if (n < 0) + goto out; + + fake_cpuc->n_events = n; + n = collect_events(fake_cpuc, event, false); + if (n < 0) + goto out; + + fake_cpuc->n_events = 0; + ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); + +out: + free_fake_cpuc(fake_cpuc); + return ret; +} + +static int x86_pmu_event_init(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = NULL; + int err; + + if ((event->attr.type != event->pmu->type) && + (event->attr.type != PERF_TYPE_HARDWARE) && + (event->attr.type != PERF_TYPE_HW_CACHE)) + return -ENOENT; + + if (is_hybrid() && (event->cpu != -1)) { + pmu = hybrid_pmu(event->pmu); + if (!cpumask_test_cpu(event->cpu, &pmu->supported_cpus)) + return -ENOENT; + } + + err = __x86_pmu_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + else + err = validate_event(event); + } + if (err) { + if (event->destroy) + event->destroy(event); + event->destroy = NULL; + } + + if (READ_ONCE(x86_pmu.attr_rdpmc) && + !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; + + return err; +} + +void perf_clear_dirty_counters(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + /* Don't need to clear the assigned counter. */ + for (i = 0; i < cpuc->n_events; i++) + __clear_bit(cpuc->assign[i], cpuc->dirty); + + if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX)) + return; + + for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { + if (i >= INTEL_PMC_IDX_FIXED) { + /* Metrics and fake events don't have corresponding HW counters. */ + if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + continue; + + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); + } else { + wrmsrl(x86_pmu_event_addr(i), 0); + } + } + + bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); +} + +static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) +{ + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) + return; + + /* + * This function relies on not being called concurrently in two + * tasks in the same mm. Otherwise one task could observe + * perf_rdpmc_allowed > 1 and return all the way back to + * userspace with CR4.PCE clear while another task is still + * doing on_each_cpu_mask() to propagate CR4.PCE. + * + * For now, this can't happen because all callers hold mmap_lock + * for write. If this changes, we'll need a different solution. + */ + mmap_assert_write_locked(mm); + + if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); +} + +static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) +{ + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) + return; + + if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); +} + +static int x86_pmu_event_idx(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) + return 0; + + if (is_metric_idx(hwc->idx)) + return INTEL_PMC_FIXED_RDPMC_METRICS + 1; + else + return hwc->event_base_rdpmc + 1; +} + +static ssize_t get_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); +} + +static ssize_t set_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + ssize_t ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val > 2) + return -EINVAL; + + if (x86_pmu.attr_rdpmc_broken) + return -ENOTSUPP; + + if (val != x86_pmu.attr_rdpmc) { + /* + * Changing into or out of never available or always available, + * aka perf-event-bypassing mode. This path is extremely slow, + * but only root can trigger it, so it's okay. + */ + if (val == 0) + static_branch_inc(&rdpmc_never_available_key); + else if (x86_pmu.attr_rdpmc == 0) + static_branch_dec(&rdpmc_never_available_key); + + if (val == 2) + static_branch_inc(&rdpmc_always_available_key); + else if (x86_pmu.attr_rdpmc == 2) + static_branch_dec(&rdpmc_always_available_key); + + on_each_cpu(cr4_update_pce, NULL, 1); + x86_pmu.attr_rdpmc = val; + } + + return count; +} + +static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); + +static struct attribute *x86_pmu_attrs[] = { + &dev_attr_rdpmc.attr, + NULL, +}; + +static struct attribute_group x86_pmu_attr_group __ro_after_init = { + .attrs = x86_pmu_attrs, +}; + +static ssize_t max_precise_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); +} + +static DEVICE_ATTR_RO(max_precise); + +static struct attribute *x86_pmu_caps_attrs[] = { + &dev_attr_max_precise.attr, + NULL +}; + +static struct attribute_group x86_pmu_caps_group __ro_after_init = { + .name = "caps", + .attrs = x86_pmu_caps_attrs, +}; + +static const struct attribute_group *x86_pmu_attr_groups[] = { + &x86_pmu_attr_group, + &x86_pmu_format_group, + &x86_pmu_events_group, + &x86_pmu_caps_group, + NULL, +}; + +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + static_call_cond(x86_pmu_sched_task)(ctx, sched_in); +} + +static void x86_pmu_swap_task_ctx(struct perf_event_context *prev, + struct perf_event_context *next) +{ + static_call_cond(x86_pmu_swap_task_ctx)(prev, next); +} + +void perf_check_microcode(void) +{ + if (x86_pmu.check_microcode) + x86_pmu.check_microcode(); +} + +static int x86_pmu_check_period(struct perf_event *event, u64 value) +{ + if (x86_pmu.check_period && x86_pmu.check_period(event, value)) + return -EINVAL; + + if (value && x86_pmu.limit_period) { + s64 left = value; + x86_pmu.limit_period(event, &left); + if (left > value) + return -EINVAL; + } + + return 0; +} + +static int x86_pmu_aux_output_match(struct perf_event *event) +{ + if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT)) + return 0; + + if (x86_pmu.aux_output_match) + return x86_pmu.aux_output_match(event); + + return 0; +} + +static int x86_pmu_filter_match(struct perf_event *event) +{ + if (x86_pmu.filter_match) + return x86_pmu.filter_match(event); + + return 1; +} + +static struct pmu pmu = { + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, + + .attr_groups = x86_pmu_attr_groups, + + .event_init = x86_pmu_event_init, + + .event_mapped = x86_pmu_event_mapped, + .event_unmapped = x86_pmu_event_unmapped, + + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, + + .event_idx = x86_pmu_event_idx, + .sched_task = x86_pmu_sched_task, + .swap_task_ctx = x86_pmu_swap_task_ctx, + .check_period = x86_pmu_check_period, + + .aux_output_match = x86_pmu_aux_output_match, + + .filter_match = x86_pmu_filter_match, +}; + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + struct cyc2ns_data data; + u64 offset; + + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); + userpg->pmc_width = x86_pmu.cntval_bits; + + if (!using_native_sched_clock() || !sched_clock_stable()) + return; + + cyc2ns_read_begin(&data); + + offset = data.cyc2ns_offset + __sched_clock_offset; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always in the local_clock domain. + */ + userpg->cap_user_time = 1; + userpg->time_mult = data.cyc2ns_mul; + userpg->time_shift = data.cyc2ns_shift; + userpg->time_offset = offset - now; + + /* + * cap_user_time_zero doesn't make sense when we're using a different + * time base for the records. + */ + if (!event->attr.use_clockid) { + userpg->cap_user_time_zero = 1; + userpg->time_zero = offset; + } + + cyc2ns_read_end(); +} + +/* + * Determine whether the regs were taken from an irq/exception handler rather + * than from perf_arch_fetch_caller_regs(). + */ +static bool perf_hw_regs(struct pt_regs *regs) +{ + return regs->flags & X86_EFLAGS_FIXED; +} + +void +perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) +{ + struct unwind_state state; + unsigned long addr; + + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + if (perf_callchain_store(entry, regs->ip)) + return; + + if (perf_hw_regs(regs)) + unwind_start(&state, current, regs, NULL); + else + unwind_start(&state, current, NULL, (void *)regs->sp); + + for (; !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } +} + +static inline int +valid_user_frame(const void __user *fp, unsigned long size) +{ + return __access_ok(fp, size); +} + +static unsigned long get_segment_base(unsigned int segment) +{ + struct desc_struct *desc; + unsigned int idx = segment >> 3; + + if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = READ_ONCE(current->active_mm->context.ldt); + if (!ldt || idx >= ldt->nr_entries) + return 0; + + desc = &ldt->entries[idx]; +#else + return 0; +#endif + } else { + if (idx >= GDT_ENTRIES) + return 0; + + desc = raw_cpu_ptr(gdt_page.gdt) + idx; + } + + return get_desc_base(desc); +} + +#ifdef CONFIG_IA32_EMULATION + +#include + +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry) +{ + /* 32-bit process in 64-bit kernel. */ + unsigned long ss_base, cs_base; + struct stack_frame_ia32 frame; + const struct stack_frame_ia32 __user *fp; + + if (user_64bit_mode(regs)) + return 0; + + cs_base = get_segment_base(regs->cs); + ss_base = get_segment_base(regs->ss); + + fp = compat_ptr(ss_base + regs->bp); + pagefault_disable(); + while (entry->nr < entry->max_stack) { + if (!valid_user_frame(fp, sizeof(frame))) + break; + + if (__get_user(frame.next_frame, &fp->next_frame)) + break; + if (__get_user(frame.return_address, &fp->return_address)) + break; + + perf_callchain_store(entry, cs_base + frame.return_address); + fp = compat_ptr(ss_base + frame.next_frame); + } + pagefault_enable(); + return 1; +} +#else +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry) +{ + return 0; +} +#endif + +void +perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) +{ + struct stack_frame frame; + const struct stack_frame __user *fp; + + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + /* + * We don't know what to do with VM86 stacks.. ignore them for now. + */ + if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) + return; + + fp = (void __user *)regs->bp; + + perf_callchain_store(entry, regs->ip); + + if (!nmi_uaccess_okay()) + return; + + if (perf_callchain_user32(regs, entry)) + return; + + pagefault_disable(); + while (entry->nr < entry->max_stack) { + if (!valid_user_frame(fp, sizeof(frame))) + break; + + if (__get_user(frame.next_frame, &fp->next_frame)) + break; + if (__get_user(frame.return_address, &fp->return_address)) + break; + + perf_callchain_store(entry, frame.return_address); + fp = (void __user *)frame.next_frame; + } + pagefault_enable(); +} + +/* + * Deal with code segment offsets for the various execution modes: + * + * VM86 - the good olde 16 bit days, where the linear address is + * 20 bits and we use regs->ip + 0x10 * regs->cs. + * + * IA32 - Where we need to look at GDT/LDT segment descriptor tables + * to figure out what the 32bit base address is. + * + * X32 - has TIF_X32 set, but is running in x86_64 + * + * X86_64 - CS,DS,SS,ES are all zero based. + */ +static unsigned long code_segment_base(struct pt_regs *regs) +{ + /* + * For IA32 we look at the GDT/LDT segment base to convert the + * effective IP to a linear address. + */ + +#ifdef CONFIG_X86_32 + /* + * If we are in VM86 mode, add the segment offset to convert to a + * linear address. + */ + if (regs->flags & X86_VM_MASK) + return 0x10 * regs->cs; + + if (user_mode(regs) && regs->cs != __USER_CS) + return get_segment_base(regs->cs); +#else + if (user_mode(regs) && !user_64bit_mode(regs) && + regs->cs != __USER32_CS) + return get_segment_base(regs->cs); +#endif + return 0; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_state()) + return perf_guest_get_ip(); + + return regs->ip + code_segment_base(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + unsigned int guest_state = perf_guest_state(); + int misc = 0; + + if (guest_state) { + if (guest_state & PERF_GUEST_USER) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + if (regs->flags & PERF_EFLAGS_EXACT) + misc |= PERF_RECORD_MISC_EXACT_IP; + + return misc; +} + +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ + /* This API doesn't currently support enumerating hybrid PMUs. */ + if (WARN_ON_ONCE(cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) || + !x86_pmu_initialized()) { + memset(cap, 0, sizeof(*cap)); + return; + } + + /* + * Note, hybrid CPU models get tracked as having hybrid PMUs even when + * all E-cores are disabled via BIOS. When E-cores are disabled, the + * base PMU holds the correct number of counters for P-cores. + */ + cap->version = x86_pmu.version; + cap->num_counters_gp = x86_pmu.num_counters; + cap->num_counters_fixed = x86_pmu.num_counters_fixed; + cap->bit_width_gp = x86_pmu.cntval_bits; + cap->bit_width_fixed = x86_pmu.cntval_bits; + cap->events_mask = (unsigned int)x86_pmu.events_maskl; + cap->events_mask_len = x86_pmu.events_mask_len; + cap->pebs_ept = x86_pmu.pebs_ept; +} +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); + +u64 perf_get_hw_event_config(int hw_event) +{ + int max = x86_pmu.max_events; + + if (hw_event < max) + return x86_pmu.event_map(array_index_nospec(hw_event, max)); + + return 0; +} +EXPORT_SYMBOL_GPL(perf_get_hw_event_config); diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile new file mode 100644 index 000000000..10bde6c5a --- /dev/null +++ b/arch/x86/events/intel/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o +obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o +intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o uncore_discovery.o +obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o +intel-cstate-objs := cstate.o diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c new file mode 100644 index 000000000..974e917e6 --- /dev/null +++ b/arch/x86/events/intel/bts.c @@ -0,0 +1,625 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * BTS PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + */ + +#undef DEBUG + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../perf_event.h" + +struct bts_ctx { + struct perf_output_handle handle; + struct debug_store ds_back; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, +}; + +static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); + +#define BTS_RECORD_SIZE 24 +#define BTS_SAFETY_MARGIN 4080 + +struct bts_phys { + struct page *page; + unsigned long size; + unsigned long offset; + unsigned long displacement; +}; + +struct bts_buffer { + size_t real_size; /* multiple of BTS_RECORD_SIZE */ + unsigned int nr_pages; + unsigned int nr_bufs; + unsigned int cur_buf; + bool snapshot; + local_t data_size; + local_t head; + unsigned long end; + void **data_pages; + struct bts_phys buf[]; +}; + +static struct pmu bts_pmu; + +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + +static size_t buf_size(struct page *page) +{ + return buf_nr_pages(page) * PAGE_SIZE; +} + +static void * +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) +{ + struct bts_buffer *buf; + struct page *page; + int cpu = event->cpu; + int node = (cpu == -1) ? cpu : cpu_to_node(cpu); + unsigned long offset; + size_t size = nr_pages << PAGE_SHIFT; + int pg, nbuf, pad; + + /* count all the high order buffers */ + for (pg = 0, nbuf = 0; pg < nr_pages;) { + page = virt_to_page(pages[pg]); + pg += buf_nr_pages(page); + nbuf++; + } + + /* + * to avoid interrupts in overwrite mode, only allow one physical + */ + if (overwrite && nbuf > 1) + return NULL; + + buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->nr_pages = nr_pages; + buf->nr_bufs = nbuf; + buf->snapshot = overwrite; + buf->data_pages = pages; + buf->real_size = size - size % BTS_RECORD_SIZE; + + for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { + unsigned int __nr_pages; + + page = virt_to_page(pages[pg]); + __nr_pages = buf_nr_pages(page); + buf->buf[nbuf].page = page; + buf->buf[nbuf].offset = offset; + buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); + buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; + pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; + buf->buf[nbuf].size -= pad; + + pg += __nr_pages; + offset += __nr_pages << PAGE_SHIFT; + } + + return buf; +} + +static void bts_buffer_free_aux(void *data) +{ + kfree(data); +} + +static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) +{ + return buf->buf[idx].offset + buf->buf[idx].displacement; +} + +static void +bts_config_buffer(struct bts_buffer *buf) +{ + int cpu = raw_smp_processor_id(); + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct bts_phys *phys = &buf->buf[buf->cur_buf]; + unsigned long index, thresh = 0, end = phys->size; + struct page *page = phys->page; + + index = local_read(&buf->head); + + if (!buf->snapshot) { + if (buf->end < phys->offset + buf_size(page)) + end = buf->end - phys->offset - phys->displacement; + + index -= phys->offset + phys->displacement; + + if (end - index > BTS_SAFETY_MARGIN) + thresh = end - BTS_SAFETY_MARGIN; + else if (end - index > BTS_RECORD_SIZE) + thresh = end - BTS_RECORD_SIZE; + else + thresh = end; + } + + ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement; + ds->bts_index = ds->bts_buffer_base + index; + ds->bts_absolute_maximum = ds->bts_buffer_base + end; + ds->bts_interrupt_threshold = !buf->snapshot + ? ds->bts_buffer_base + thresh + : ds->bts_absolute_maximum + BTS_RECORD_SIZE; +} + +static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head) +{ + unsigned long index = head - phys->offset; + + memset(page_address(phys->page) + index, 0, phys->size - index); +} + +static void bts_update(struct bts_ctx *bts) +{ + int cpu = raw_smp_processor_id(); + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct bts_buffer *buf = perf_get_aux(&bts->handle); + unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head; + + if (!buf) + return; + + head = index + bts_buffer_offset(buf, buf->cur_buf); + old = local_xchg(&buf->head, head); + + if (!buf->snapshot) { + if (old == head) + return; + + if (ds->bts_index >= ds->bts_absolute_maximum) + perf_aux_output_flag(&bts->handle, + PERF_AUX_FLAG_TRUNCATED); + + /* + * old and head are always in the same physical buffer, so we + * can subtract them to get the data size. + */ + local_add(head - old, &buf->data_size); + } else { + local_set(&buf->data_size, head); + } + + /* + * Since BTS is coherent, just add compiler barrier to ensure + * BTS updating is ordered against bts::handle::event. + */ + barrier(); +} + +static int +bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); + +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + +static void __bts_event_start(struct perf_event *event) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf = perf_get_aux(&bts->handle); + u64 config = 0; + + if (!buf->snapshot) + config |= ARCH_PERFMON_EVENTSEL_INT; + if (!event->attr.exclude_kernel) + config |= ARCH_PERFMON_EVENTSEL_OS; + if (!event->attr.exclude_user) + config |= ARCH_PERFMON_EVENTSEL_USR; + + bts_config_buffer(buf); + + /* + * local barrier to make sure that ds configuration made it + * before we enable BTS and bts::state goes ACTIVE + */ + wmb(); + + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + + intel_pmu_enable_bts(config); + +} + +static void bts_event_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf; + + buf = perf_aux_output_begin(&bts->handle, event); + if (!buf) + goto fail_stop; + + if (bts_buffer_reset(buf, &bts->handle)) + goto fail_end_stop; + + bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; + bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; + bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; + + perf_event_itrace_started(event); + event->hw.state = 0; + + __bts_event_start(event); + + return; + +fail_end_stop: + perf_aux_output_end(&bts->handle, 0); + +fail_stop: + event->hw.state = PERF_HES_STOPPED; +} + +static void __bts_event_stop(struct perf_event *event, int state) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + + /* + * No extra synchronization is mandated by the documentation to have + * BTS data stores globally visible. + */ + intel_pmu_disable_bts(); +} + +static void bts_event_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); + + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); + + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); + + event->hw.state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_UPDATE) { + bts_update(bts); + + if (buf) { + if (buf->snapshot) + bts->handle.head = + local_xchg(&buf->data_size, + buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, + local_xchg(&buf->data_size, 0)); + } + + cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; + cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; + } +} + +void intel_bts_enable_local(void) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); + + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; + + if (bts->handle.event) + __bts_event_start(bts->handle.event); +} + +void intel_bts_disable_local(void) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + + if (bts->handle.event) + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); +} + +static int +bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) +{ + unsigned long head, space, next_space, pad, gap, skip, wakeup; + unsigned int next_buf; + struct bts_phys *phys, *next_phys; + int ret; + + if (buf->snapshot) + return 0; + + head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); + + phys = &buf->buf[buf->cur_buf]; + space = phys->offset + phys->displacement + phys->size - head; + pad = space; + if (space > handle->size) { + space = handle->size; + space -= space % BTS_RECORD_SIZE; + } + if (space <= BTS_SAFETY_MARGIN) { + /* See if next phys buffer has more space */ + next_buf = buf->cur_buf + 1; + if (next_buf >= buf->nr_bufs) + next_buf = 0; + next_phys = &buf->buf[next_buf]; + gap = buf_size(phys->page) - phys->displacement - phys->size + + next_phys->displacement; + skip = pad + gap; + if (handle->size >= skip) { + next_space = next_phys->size; + if (next_space + skip > handle->size) { + next_space = handle->size - skip; + next_space -= next_space % BTS_RECORD_SIZE; + } + if (next_space > space || !space) { + if (pad) + bts_buffer_pad_out(phys, head); + ret = perf_aux_output_skip(handle, skip); + if (ret) + return ret; + /* Advance to next phys buffer */ + phys = next_phys; + space = next_space; + head = phys->offset + phys->displacement; + /* + * After this, cur_buf and head won't match ds + * anymore, so we must not be racing with + * bts_update(). + */ + buf->cur_buf = next_buf; + local_set(&buf->head, head); + } + } + } + + /* Don't go far beyond wakeup watermark */ + wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup - + handle->head; + if (space > wakeup) { + space = wakeup; + space -= space % BTS_RECORD_SIZE; + } + + buf->end = head + space; + + /* + * If we have no space, the lost notification would have been sent when + * we hit absolute_maximum - see bts_update() + */ + if (!space) + return -ENOSPC; + + return 0; +} + +int intel_bts_interrupt(void) +{ + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct perf_event *event = bts->handle.event; + struct bts_buffer *buf; + s64 old_head; + int err = -ENOSPC, handled = 0; + + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) + handled = 1; + + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) + return handled; + + buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + + /* + * Skip snapshot counters: they don't use the interrupt, but + * there's no other way of telling, because the pointer will + * keep moving + */ + if (buf->snapshot) + return 0; + + old_head = local_read(&buf->head); + bts_update(bts); + + /* no new data */ + if (old_head == local_read(&buf->head)) + return handled; + + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); + + buf = perf_aux_output_begin(&bts->handle, event); + if (buf) + err = bts_buffer_reset(buf, &bts->handle); + + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); + + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0); + } + } + + return 1; +} + +static void bts_event_del(struct perf_event *event, int mode) +{ + bts_event_stop(event, PERF_EF_UPDATE); +} + +static int bts_event_add(struct perf_event *event, int mode) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + event->hw.state = PERF_HES_STOPPED; + + if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + return -EBUSY; + + if (bts->handle.event) + return -EBUSY; + + if (mode & PERF_EF_START) { + bts_event_start(event, 0); + if (hwc->state & PERF_HES_STOPPED) + return -EINVAL; + } + + return 0; +} + +static void bts_event_destroy(struct perf_event *event) +{ + x86_release_hardware(); + x86_del_exclusive(x86_lbr_exclusive_bts); +} + +static int bts_event_init(struct perf_event *event) +{ + int ret; + + if (event->attr.type != bts_pmu.type) + return -ENOENT; + + /* + * BTS leaks kernel addresses even when CPL0 tracing is + * disabled, so disallow intel_bts driver for unprivileged + * users on paranoid systems since it provides trace data + * to the user in a zero-copy fashion. + * + * Note that the default paranoia setting permits unprivileged + * users to profile the kernel. + */ + if (event->attr.exclude_kernel) { + ret = perf_allow_kernel(&event->attr); + if (ret) + return ret; + } + + if (x86_add_exclusive(x86_lbr_exclusive_bts)) + return -EBUSY; + + ret = x86_reserve_hardware(); + if (ret) { + x86_del_exclusive(x86_lbr_exclusive_bts); + return ret; + } + + event->destroy = bts_event_destroy; + + return 0; +} + +static void bts_event_read(struct perf_event *event) +{ +} + +static __init int bts_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) + return -ENODEV; + + if (boot_cpu_has(X86_FEATURE_PTI)) { + /* + * BTS hardware writes through a virtual memory map we must + * either use the kernel physical map, or the user mapping of + * the AUX buffer. + * + * However, since this driver supports per-CPU and per-task inherit + * we cannot use the user mapping since it will not be available + * if we're not running the owning process. + * + * With PTI we can't use the kernel map either, because its not + * there when we run userspace. + * + * For now, disable this driver when using PTI. + */ + return -ENODEV; + } + + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; + bts_pmu.task_ctx_nr = perf_sw_context; + bts_pmu.event_init = bts_event_init; + bts_pmu.add = bts_event_add; + bts_pmu.del = bts_event_del; + bts_pmu.start = bts_event_start; + bts_pmu.stop = bts_event_stop; + bts_pmu.read = bts_event_read; + bts_pmu.setup_aux = bts_buffer_setup_aux; + bts_pmu.free_aux = bts_buffer_free_aux; + + return perf_pmu_register(&bts_pmu, "intel_bts", -1); +} +arch_initcall(bts_init); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c new file mode 100644 index 000000000..949129443 --- /dev/null +++ b/arch/x86/events/intel/core.c @@ -0,0 +1,6668 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Per core/cpu state + * + * Used to coordinate shared registers between HT threads or + * among events on a single PMU. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../perf_event.h" + +/* + * Intel PerfMon, used on Core and later. + */ +static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ +}; + +static struct event_constraint intel_core_event_constraints[] __read_mostly = +{ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_core2_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ + INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = +{ + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_westmere_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_snb_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ + INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_ivb_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMPTY */ + INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ + INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_westmere_extra_regs[] __read_mostly = +{ + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_v1_event_constraints[] __read_mostly = +{ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_gen_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + FIXED_EVENT_CONSTRAINT(0x0500, 4), + FIXED_EVENT_CONSTRAINT(0x0600, 5), + FIXED_EVENT_CONSTRAINT(0x0700, 6), + FIXED_EVENT_CONSTRAINT(0x0800, 7), + FIXED_EVENT_CONSTRAINT(0x0900, 8), + FIXED_EVENT_CONSTRAINT(0x0a00, 9), + FIXED_EVENT_CONSTRAINT(0x0b00, 10), + FIXED_EVENT_CONSTRAINT(0x0c00, 11), + FIXED_EVENT_CONSTRAINT(0x0d00, 12), + FIXED_EVENT_CONSTRAINT(0x0e00, 13), + FIXED_EVENT_CONSTRAINT(0x0f00, 14), + FIXED_EVENT_CONSTRAINT(0x1000, 15), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_slm_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_skl_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ + + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ + + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_knl_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_snb_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_skl_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + /* + * Note the low 8 bits eventsel code is not a continuous field, containing + * some #GPing bits. These are masked out. + */ + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_icl_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), + INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), + INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ + INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ + INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT(0xef, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_icl_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_spr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_spr_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + + INTEL_EVENT_CONSTRAINT(0x2e, 0xff), + INTEL_EVENT_CONSTRAINT(0x3c, 0xff), + /* + * Generally event codes < 0x90 are restricted to counters 0-3. + * The 0x2E and 0x3C are exception, which has no restriction. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1), + INTEL_EVENT_CONSTRAINT(0xce, 0x1), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), + /* + * Generally event codes >= 0x90 are likely to have no restrictions. + * The exception are defined as above. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff), + + EVENT_CONSTRAINT_END +}; + + +EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); +EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); + +static struct attribute *nhm_mem_events_attrs[] = { + EVENT_PTR(mem_ld_nhm), + NULL, +}; + +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + +EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4"); +EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81"); +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83"); +EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84"); +EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85"); +EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86"); +EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87"); + +static struct attribute *snb_events_attrs[] = { + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), + NULL, +}; + +static struct attribute *snb_mem_events_attrs[] = { + EVENT_PTR(mem_ld_snb), + EVENT_PTR(mem_st_snb), + NULL, +}; + +static struct event_constraint intel_hsw_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), + /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), + /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), + + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_bdw_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + +static u64 intel_pmu_event_map(int hw_event) +{ + return intel_perfmon_event_map[hw_event]; +} + +static __initconst const u64 spr_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, + [ C(RESULT_MISS) ] = 0xe124, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_MISS) ] = 0xe424, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, + [ C(RESULT_MISS) ] = 0xe12, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, + [ C(RESULT_MISS) ] = 0xe13, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = 0xe11, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4c4, + [ C(RESULT_MISS) ] = 0x4c5, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + }, +}; + +static __initconst const u64 spr_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x10001, + [ C(RESULT_MISS) ] = 0x3fbfc00001, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x3f3ffc0002, + [ C(RESULT_MISS) ] = 0x3f3fc00002, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x10c000001, + [ C(RESULT_MISS) ] = 0x3fb3000001, + }, + }, +}; + +/* + * Notes on the events: + * - data reads do not include code reads (comparable to earlier tables) + * - data counts include speculative execution (except L1 write, dtlb, bpu) + * - remote node access includes remote memory, remote cache, remote mmio. + * - prefetches are not included in the counts. + * - icache miss does not include decoded icache + */ + +#define SKL_DEMAND_DATA_RD BIT_ULL(0) +#define SKL_DEMAND_RFO BIT_ULL(1) +#define SKL_ANY_RESPONSE BIT_ULL(16) +#define SKL_SUPPLIER_NONE BIT_ULL(17) +#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26) +#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27) +#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28) +#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29) +#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \ + SKL_L3_MISS_REMOTE_HOP0_DRAM| \ + SKL_L3_MISS_REMOTE_HOP1_DRAM| \ + SKL_L3_MISS_REMOTE_HOP2P_DRAM) +#define SKL_SPL_HIT BIT_ULL(30) +#define SKL_SNOOP_NONE BIT_ULL(31) +#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32) +#define SKL_SNOOP_MISS BIT_ULL(33) +#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34) +#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35) +#define SKL_SNOOP_HITM BIT_ULL(36) +#define SKL_SNOOP_NON_DRAM BIT_ULL(37) +#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \ + SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ + SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ + SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM) +#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD +#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \ + SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ + SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ + SKL_SNOOP_HITM|SKL_SPL_HIT) +#define SKL_DEMAND_WRITE SKL_DEMAND_RFO +#define SKL_LLC_ACCESS SKL_ANY_RESPONSE +#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \ + SKL_L3_MISS_REMOTE_HOP1_DRAM| \ + SKL_L3_MISS_REMOTE_HOP2P_DRAM) + +static __initconst const u64 skl_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + +static __initconst const u64 skl_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| + SKL_LLC_ACCESS|SKL_ANY_SNOOP, + [ C(RESULT_MISS) ] = SKL_DEMAND_READ| + SKL_L3_MISS|SKL_ANY_SNOOP| + SKL_SUPPLIER_NONE, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| + SKL_LLC_ACCESS|SKL_ANY_SNOOP, + [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| + SKL_L3_MISS|SKL_ANY_SNOOP| + SKL_SUPPLIER_NONE, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| + SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, + [ C(RESULT_MISS) ] = SKL_DEMAND_READ| + SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| + SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, + [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| + SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + +#define SNB_DMND_DATA_RD (1ULL << 0) +#define SNB_DMND_RFO (1ULL << 1) +#define SNB_DMND_IFETCH (1ULL << 2) +#define SNB_DMND_WB (1ULL << 3) +#define SNB_PF_DATA_RD (1ULL << 4) +#define SNB_PF_RFO (1ULL << 5) +#define SNB_PF_IFETCH (1ULL << 6) +#define SNB_LLC_DATA_RD (1ULL << 7) +#define SNB_LLC_RFO (1ULL << 8) +#define SNB_LLC_IFETCH (1ULL << 9) +#define SNB_BUS_LOCKS (1ULL << 10) +#define SNB_STRM_ST (1ULL << 11) +#define SNB_OTHER (1ULL << 15) +#define SNB_RESP_ANY (1ULL << 16) +#define SNB_NO_SUPP (1ULL << 17) +#define SNB_LLC_HITM (1ULL << 18) +#define SNB_LLC_HITE (1ULL << 19) +#define SNB_LLC_HITS (1ULL << 20) +#define SNB_LLC_HITF (1ULL << 21) +#define SNB_LOCAL (1ULL << 22) +#define SNB_REMOTE (0xffULL << 23) +#define SNB_SNP_NONE (1ULL << 31) +#define SNB_SNP_NOT_NEEDED (1ULL << 32) +#define SNB_SNP_MISS (1ULL << 33) +#define SNB_NO_FWD (1ULL << 34) +#define SNB_SNP_FWD (1ULL << 35) +#define SNB_HITM (1ULL << 36) +#define SNB_NON_DRAM (1ULL << 37) + +#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) +#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) +#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) + +#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ + SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ + SNB_HITM) + +#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) +#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) + +#define SNB_L3_ACCESS SNB_RESP_ANY +#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) + +static __initconst const u64 snb_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, + }, + }, +}; + +static __initconst const u64 snb_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, + +}; + +/* + * Notes on the events: + * - data reads do not include code reads (comparable to earlier tables) + * - data counts include speculative execution (except L1 write, dtlb, bpu) + * - remote node access includes remote memory, remote cache, remote mmio. + * - prefetches are not included in the counts because they are not + * reliably counted. + */ + +#define HSW_DEMAND_DATA_RD BIT_ULL(0) +#define HSW_DEMAND_RFO BIT_ULL(1) +#define HSW_ANY_RESPONSE BIT_ULL(16) +#define HSW_SUPPLIER_NONE BIT_ULL(17) +#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) +#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) +#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) +#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) +#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ + HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ + HSW_L3_MISS_REMOTE_HOP2P) +#define HSW_SNOOP_NONE BIT_ULL(31) +#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) +#define HSW_SNOOP_MISS BIT_ULL(33) +#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) +#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) +#define HSW_SNOOP_HITM BIT_ULL(36) +#define HSW_SNOOP_NON_DRAM BIT_ULL(37) +#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ + HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ + HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ + HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) +#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) +#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD +#define HSW_DEMAND_WRITE HSW_DEMAND_RFO +#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ + HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) +#define HSW_LLC_ACCESS HSW_ANY_RESPONSE + +#define BDW_L3_MISS_LOCAL BIT(26) +#define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ + HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ + HSW_L3_MISS_REMOTE_HOP2P) + + +static __initconst const u64 hsw_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + +static __initconst const u64 hsw_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| + HSW_LLC_ACCESS, + [ C(RESULT_MISS) ] = HSW_DEMAND_READ| + HSW_L3_MISS|HSW_ANY_SNOOP, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| + HSW_LLC_ACCESS, + [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| + HSW_L3_MISS|HSW_ANY_SNOOP, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| + HSW_L3_MISS_LOCAL_DRAM| + HSW_SNOOP_DRAM, + [ C(RESULT_MISS) ] = HSW_DEMAND_READ| + HSW_L3_MISS_REMOTE| + HSW_SNOOP_DRAM, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| + HSW_L3_MISS_LOCAL_DRAM| + HSW_SNOOP_DRAM, + [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| + HSW_L3_MISS_REMOTE| + HSW_SNOOP_DRAM, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + +static __initconst const u64 westmere_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, +}; + +/* + * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; + * See IA32 SDM Vol 3B 30.6.1.3 + */ + +#define NHM_DMND_DATA_RD (1 << 0) +#define NHM_DMND_RFO (1 << 1) +#define NHM_DMND_IFETCH (1 << 2) +#define NHM_DMND_WB (1 << 3) +#define NHM_PF_DATA_RD (1 << 4) +#define NHM_PF_DATA_RFO (1 << 5) +#define NHM_PF_IFETCH (1 << 6) +#define NHM_OFFCORE_OTHER (1 << 7) +#define NHM_UNCORE_HIT (1 << 8) +#define NHM_OTHER_CORE_HIT_SNP (1 << 9) +#define NHM_OTHER_CORE_HITM (1 << 10) + /* reserved */ +#define NHM_REMOTE_CACHE_FWD (1 << 12) +#define NHM_REMOTE_DRAM (1 << 13) +#define NHM_LOCAL_DRAM (1 << 14) +#define NHM_NON_DRAM (1 << 15) + +#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_REMOTE (NHM_REMOTE_DRAM) + +#define NHM_DMND_READ (NHM_DMND_DATA_RD) +#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) +#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) + +#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) + +static __initconst const u64 nehalem_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, + }, + }, +}; + +static __initconst const u64 nehalem_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, +}; + +static __initconst const u64 core2_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static __initconst const u64 atom_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + +static struct extra_reg intel_slm_extra_regs[] __read_mostly = +{ + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1), + EVENT_EXTRA_END +}; + +#define SLM_DMND_READ SNB_DMND_DATA_RD +#define SLM_DMND_WRITE SNB_DMND_RFO +#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) + +#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) +#define SLM_LLC_ACCESS SNB_RESP_ANY +#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) + +static __initconst const u64 slm_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, + [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, + [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, + }, + }, +}; + +static __initconst const u64 slm_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ + [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3"); +/* UOPS_NOT_DELIVERED.ANY */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c"); +/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */ +EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02"); +/* UOPS_RETIRED.ANY */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2"); +/* UOPS_ISSUED.ANY */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e"); + +static struct attribute *glm_events_attrs[] = { + EVENT_PTR(td_total_slots_glm), + EVENT_PTR(td_total_slots_scale_glm), + EVENT_PTR(td_fetch_bubbles_glm), + EVENT_PTR(td_recovery_bubbles_glm), + EVENT_PTR(td_slots_issued_glm), + EVENT_PTR(td_slots_retired_glm), + NULL +}; + +static struct extra_reg intel_glm_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), + EVENT_EXTRA_END +}; + +#define GLM_DEMAND_DATA_RD BIT_ULL(0) +#define GLM_DEMAND_RFO BIT_ULL(1) +#define GLM_ANY_RESPONSE BIT_ULL(16) +#define GLM_SNP_NONE_OR_MISS BIT_ULL(33) +#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD +#define GLM_DEMAND_WRITE GLM_DEMAND_RFO +#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) +#define GLM_LLC_ACCESS GLM_ANY_RESPONSE +#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) +#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) + +static __initconst const u64 glm_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ + [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ + [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +static __initconst const u64 glm_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_READ| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_READ| + GLM_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_WRITE| + GLM_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| + GLM_LLC_MISS, + }, + }, +}; + +static __initconst const u64 glp_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ + [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ + [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +static __initconst const u64 glp_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_READ| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_READ| + GLM_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_WRITE| + GLM_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, +}; + +#define TNT_LOCAL_DRAM BIT_ULL(26) +#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD +#define TNT_DEMAND_WRITE GLM_DEMAND_RFO +#define TNT_LLC_ACCESS GLM_ANY_RESPONSE +#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \ + SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM) +#define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM) + +static __initconst const u64 tnt_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TNT_DEMAND_READ| + TNT_LLC_ACCESS, + [C(RESULT_MISS)] = TNT_DEMAND_READ| + TNT_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE| + TNT_LLC_ACCESS, + [C(RESULT_MISS)] = TNT_DEMAND_WRITE| + TNT_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, +}; + +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_tnt, "event=0x71,umask=0x0"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_tnt, "event=0xc2,umask=0x0"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_tnt, "event=0x73,umask=0x6"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_tnt, "event=0x74,umask=0x0"); + +static struct attribute *tnt_events_attrs[] = { + EVENT_PTR(td_fe_bound_tnt), + EVENT_PTR(td_retiring_tnt), + EVENT_PTR(td_bad_spec_tnt), + EVENT_PTR(td_be_bound_tnt), + NULL, +}; + +static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), + EVENT_EXTRA_END +}; + +EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6"); + +static struct attribute *grt_mem_attrs[] = { + EVENT_PTR(mem_ld_grt), + EVENT_PTR(mem_st_grt), + NULL +}; + +static struct extra_reg intel_grt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), + EVENT_EXTRA_END +}; + +#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ +#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ +#define KNL_MCDRAM_LOCAL BIT_ULL(21) +#define KNL_MCDRAM_FAR BIT_ULL(22) +#define KNL_DDR_LOCAL BIT_ULL(23) +#define KNL_DDR_FAR BIT_ULL(24) +#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \ + KNL_DDR_LOCAL | KNL_DDR_FAR) +#define KNL_L2_READ SLM_DMND_READ +#define KNL_L2_WRITE SLM_DMND_WRITE +#define KNL_L2_PREFETCH SLM_DMND_PREFETCH +#define KNL_L2_ACCESS SLM_LLC_ACCESS +#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \ + KNL_DRAM_ANY | SNB_SNP_ANY | \ + SNB_NON_DRAM) + +static __initconst const u64 knl_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS, + [C(RESULT_MISS)] = 0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS, + [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS, + [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS, + }, + }, +}; + +/* + * Used from PMIs where the LBRs are already disabled. + * + * This function could be called consecutively. It is required to remain in + * disabled state if called consecutively. + * + * During consecutive calls, the same disable value will be written to related + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. + * + * Avoid PEBS_ENABLE MSR access in PMIs. + * The GLOBAL_CTRL has been disabled. All the counters do not count anymore. + * It doesn't matter if the PEBS is enabled or not. + * Usually, the PEBS status are not changed in PMIs. It's unnecessary to + * access PEBS_ENABLE MSR in disable_all()/enable_all(). + * However, there are some cases which may change PEBS status, e.g. PMI + * throttle. The PEBS_ENABLE should be updated where the status changes. + */ +static __always_inline void __intel_pmu_disable_all(bool bts) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); +} + +static __always_inline void intel_pmu_disable_all(void) +{ + __intel_pmu_disable_all(true); + intel_pmu_pebs_disable_all(); + intel_pmu_lbr_disable_all(); +} + +static void __intel_pmu_enable_all(int added, bool pmi) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); + + intel_pmu_lbr_enable_all(pmi); + + if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) { + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); + cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val; + } + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, + intel_ctrl & ~cpuc->intel_ctrl_guest_mask); + + if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_event *event = + cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!event)) + return; + + intel_pmu_enable_bts(event->hw.config); + } +} + +static void intel_pmu_enable_all(int added) +{ + intel_pmu_pebs_enable_all(); + __intel_pmu_enable_all(added, false); +} + +static noinline int +__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, + unsigned int cnt, unsigned long flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + intel_pmu_lbr_read(); + cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr); + + memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); + intel_pmu_enable_all(0); + local_irq_restore(flags); + return cnt; +} + +static int +intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + unsigned long flags; + + /* must not have branches... */ + local_irq_save(flags); + __intel_pmu_disable_all(false); /* we don't care about BTS */ + __intel_pmu_lbr_disable(); + /* ... until here */ + return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); +} + +static int +intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + unsigned long flags; + + /* must not have branches... */ + local_irq_save(flags); + __intel_pmu_disable_all(false); /* we don't care about BTS */ + __intel_pmu_arch_lbr_disable(); + /* ... until here */ + return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); +} + +/* + * Workaround for: + * Intel Errata AAK100 (model 26) + * Intel Errata AAP53 (model 30) + * Intel Errata BD53 (model 44) + * + * The official story: + * These chips need to be 'reset' when adding counters by programming the + * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either + * in sequence on the same PMC or on different PMCs. + * + * In practice it appears some of these events do in fact count, and + * we need to program all 4 events. + */ +static void intel_pmu_nhm_workaround(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + static const unsigned long nhm_magic[4] = { + 0x4300B5, + 0x4300D2, + 0x4300B1, + 0x4300B1 + }; + struct perf_event *event; + int i; + + /* + * The Errata requires below steps: + * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; + * 2) Configure 4 PERFEVTSELx with the magic events and clear + * the corresponding PMCx; + * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; + * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; + * 5) Clear 4 pairs of ERFEVTSELx and PMCx; + */ + + /* + * The real steps we choose are a little different from above. + * A) To reduce MSR operations, we don't run step 1) as they + * are already cleared before this function is called; + * B) Call x86_perf_event_update to save PMCx before configuring + * PERFEVTSELx with magic number; + * C) With step 5), we do clear only when the PERFEVTSELx is + * not used currently. + * D) Call x86_perf_event_set_period to restore PMCx; + */ + + /* We always operate 4 pairs of PERF Counters */ + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) + static_call(x86_pmu_update)(event); + } + + for (i = 0; i < 4; i++) { + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); + wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); + } + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + + if (event) { + static_call(x86_pmu_set_period)(event); + __x86_pmu_enable_event(&event->hw, + ARCH_PERFMON_EVENTSEL_ENABLE); + } else + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); + } +} + +static void intel_pmu_nhm_enable_all(int added) +{ + if (added) + intel_pmu_nhm_workaround(); + intel_pmu_enable_all(added); +} + +static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on) +{ + u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0; + + if (cpuc->tfa_shadow != val) { + cpuc->tfa_shadow = val; + wrmsrl(MSR_TSX_FORCE_ABORT, val); + } +} + +static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) +{ + /* + * We're going to use PMC3, make sure TFA is set before we touch it. + */ + if (cntr == 3) + intel_set_tfa(cpuc, true); +} + +static void intel_tfa_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * If we find PMC3 is no longer used when we enable the PMU, we can + * clear TFA. + */ + if (!test_bit(3, cpuc->active_mask)) + intel_set_tfa(cpuc, false); + + intel_pmu_enable_all(added); +} + +static inline u64 intel_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void intel_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline bool event_is_checkpointed(struct perf_event *event) +{ + return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; +} + +static inline void intel_set_masks(struct perf_event *event, int idx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (event->attr.exclude_host) + __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask); + if (event->attr.exclude_guest) + __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask); + if (event_is_checkpointed(event)) + __set_bit(idx, (unsigned long *)&cpuc->intel_cp_status); +} + +static inline void intel_clear_masks(struct perf_event *event, int idx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask); + __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask); + __clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status); +} + +static void intel_pmu_disable_fixed(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 mask; + + if (is_topdown_idx(idx)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * When there are other active TopDown events, + * don't disable the fixed counter 3. + */ + if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx)) + return; + idx = INTEL_PMC_IDX_FIXED_SLOTS; + } + + intel_clear_masks(event, idx); + + mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4); + cpuc->fixed_ctrl_val &= ~mask; +} + +static void intel_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + switch (idx) { + case 0 ... INTEL_PMC_IDX_FIXED - 1: + intel_clear_masks(event, idx); + x86_pmu_disable_event(event); + break; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: + intel_pmu_disable_fixed(event); + break; + case INTEL_PMC_IDX_FIXED_BTS: + intel_pmu_disable_bts(); + intel_pmu_drain_bts_buffer(); + return; + case INTEL_PMC_IDX_FIXED_VLBR: + intel_clear_masks(event, idx); + break; + default: + intel_clear_masks(event, idx); + pr_warn("Failed to disable the event with invalid index %d\n", + idx); + return; + } + + /* + * Needs to be called after x86_pmu_disable_event, + * so we don't trigger the event without PEBS bit set. + */ + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_disable(event); +} + +static void intel_pmu_assign_event(struct perf_event *event, int idx) +{ + if (is_pebs_pt(event)) + perf_report_aux_output_id(event, idx); +} + +static void intel_pmu_del_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + intel_pmu_lbr_del(event); + if (event->attr.precise_ip) + intel_pmu_pebs_del(event); +} + +static int icl_set_topdown_event_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + + /* + * The values in PERF_METRICS MSR are derived from fixed counter 3. + * Software should start both registers, PERF_METRICS and fixed + * counter 3, from zero. + * Clear PERF_METRICS and Fixed counter 3 in initialization. + * After that, both MSRs will be cleared for each read. + * Don't need to clear them again. + */ + if (left == x86_pmu.max_period) { + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrl(MSR_PERF_METRICS, 0); + hwc->saved_slots = 0; + hwc->saved_metric = 0; + } + + if ((hwc->saved_slots) && is_slots_event(event)) { + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); + wrmsrl(MSR_PERF_METRICS, hwc->saved_metric); + } + + perf_event_update_userpage(event); + + return 0; +} + +static int adl_set_topdown_event_period(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type != hybrid_big) + return 0; + + return icl_set_topdown_event_period(event); +} + +DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period); + +static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) +{ + u32 val; + + /* + * The metric is reported as an 8bit integer fraction + * summing up to 0xff. + * slots-in-metric = (Metric / 0xff) * slots + */ + val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff; + return mul_u64_u32_div(slots, val, 0xff); +} + +static u64 icl_get_topdown_value(struct perf_event *event, + u64 slots, u64 metrics) +{ + int idx = event->hw.idx; + u64 delta; + + if (is_metric_idx(idx)) + delta = icl_get_metrics_event_value(metrics, slots, idx); + else + delta = slots; + + return delta; +} + +static void __icl_update_topdown_event(struct perf_event *event, + u64 slots, u64 metrics, + u64 last_slots, u64 last_metrics) +{ + u64 delta, last = 0; + + delta = icl_get_topdown_value(event, slots, metrics); + if (last_slots) + last = icl_get_topdown_value(event, last_slots, last_metrics); + + /* + * The 8bit integer fraction of metric may be not accurate, + * especially when the changes is very small. + * For example, if only a few bad_spec happens, the fraction + * may be reduced from 1 to 0. If so, the bad_spec event value + * will be 0 which is definitely less than the last value. + * Avoid update event->count for this case. + */ + if (delta > last) { + delta -= last; + local64_add(delta, &event->count); + } +} + +static void update_saved_topdown_regs(struct perf_event *event, u64 slots, + u64 metrics, int metric_end) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *other; + int idx; + + event->hw.saved_slots = slots; + event->hw.saved_metric = metrics; + + for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { + if (!is_topdown_idx(idx)) + continue; + other = cpuc->events[idx]; + other->hw.saved_slots = slots; + other->hw.saved_metric = metrics; + } +} + +/* + * Update all active Topdown events. + * + * The PERF_METRICS and Fixed counter 3 are read separately. The values may be + * modify by a NMI. PMU has to be disabled before calling this function. + */ + +static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *other; + u64 slots, metrics; + bool reset = true; + int idx; + + /* read Fixed counter 3 */ + rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots); + if (!slots) + return 0; + + /* read PERF_METRICS */ + rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); + + for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { + if (!is_topdown_idx(idx)) + continue; + other = cpuc->events[idx]; + __icl_update_topdown_event(other, slots, metrics, + event ? event->hw.saved_slots : 0, + event ? event->hw.saved_metric : 0); + } + + /* + * Check and update this event, which may have been cleared + * in active_mask e.g. x86_pmu_stop() + */ + if (event && !test_bit(event->hw.idx, cpuc->active_mask)) { + __icl_update_topdown_event(event, slots, metrics, + event->hw.saved_slots, + event->hw.saved_metric); + + /* + * In x86_pmu_stop(), the event is cleared in active_mask first, + * then drain the delta, which indicates context switch for + * counting. + * Save metric and slots for context switch. + * Don't need to reset the PERF_METRICS and Fixed counter 3. + * Because the values will be restored in next schedule in. + */ + update_saved_topdown_regs(event, slots, metrics, metric_end); + reset = false; + } + + if (reset) { + /* The fixed counter 3 has to be written before the PERF_METRICS. */ + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrl(MSR_PERF_METRICS, 0); + if (event) + update_saved_topdown_regs(event, 0, 0, metric_end); + } + + return slots; +} + +static u64 icl_update_topdown_event(struct perf_event *event) +{ + return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + + x86_pmu.num_topdown_events - 1); +} + +static u64 adl_update_topdown_event(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type != hybrid_big) + return 0; + + return icl_update_topdown_event(event); +} + +DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); + +static void intel_pmu_read_topdown_event(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* Only need to call update_topdown_event() once for group read. */ + if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && + !is_slots_event(event)) + return; + + perf_pmu_disable(event->pmu); + static_call(intel_pmu_update_topdown_event)(event); + perf_pmu_enable(event->pmu); +} + +static void intel_pmu_read_event(struct perf_event *event) +{ + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_auto_reload_read(event); + else if (is_topdown_count(event)) + intel_pmu_read_topdown_event(event); + else + x86_perf_event_update(event); +} + +static void intel_pmu_enable_fixed(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + u64 mask, bits = 0; + int idx = hwc->idx; + + if (is_topdown_idx(idx)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + /* + * When there are other active TopDown events, + * don't enable the fixed counter 3 again. + */ + if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx)) + return; + + idx = INTEL_PMC_IDX_FIXED_SLOTS; + } + + intel_set_masks(event, idx); + + /* + * Enable IRQ generation (0x8), if not PEBS, + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + if (!event->attr.precise_ip) + bits |= 0x8; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + + /* + * ANY bit is supported in v3 and up + */ + if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) + bits |= 0x4; + + idx -= INTEL_PMC_IDX_FIXED; + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) { + bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + } + + cpuc->fixed_ctrl_val &= ~mask; + cpuc->fixed_ctrl_val |= bits; +} + +static void intel_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_enable(event); + + switch (idx) { + case 0 ... INTEL_PMC_IDX_FIXED - 1: + intel_set_masks(event, idx); + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + break; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: + intel_pmu_enable_fixed(event); + break; + case INTEL_PMC_IDX_FIXED_BTS: + if (!__this_cpu_read(cpu_hw_events.enabled)) + return; + intel_pmu_enable_bts(hwc->config); + break; + case INTEL_PMC_IDX_FIXED_VLBR: + intel_set_masks(event, idx); + break; + default: + pr_warn("Failed to enable the event with invalid index %d\n", + idx); + } +} + +static void intel_pmu_add_event(struct perf_event *event) +{ + if (event->attr.precise_ip) + intel_pmu_pebs_add(event); + if (needs_branch_stack(event)) + intel_pmu_lbr_add(event); +} + +/* + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: + */ +int intel_pmu_save_and_restart(struct perf_event *event) +{ + static_call(x86_pmu_update)(event); + /* + * For a checkpointed counter always reset back to 0. This + * avoids a situation where the counter overflows, aborts the + * transaction and is then set back to shortly before the + * overflow, and overflows and aborts again. + */ + if (unlikely(event_is_checkpointed(event))) { + /* No race with NMIs because the counter should not be armed */ + wrmsrl(event->hw.event_base, 0); + local64_set(&event->hw.prev_count, 0); + } + return static_call(x86_pmu_set_period)(event); +} + +static int intel_pmu_set_period(struct perf_event *event) +{ + if (unlikely(is_topdown_count(event))) + return static_call(intel_pmu_set_topdown_event_period)(event); + + return x86_perf_event_set_period(event); +} + +static u64 intel_pmu_update(struct perf_event *event) +{ + if (unlikely(is_topdown_count(event))) + return static_call(intel_pmu_update_topdown_event)(event); + + return x86_perf_event_update(event); +} + +static void intel_pmu_reset(void) +{ + struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + int num_counters = hybrid(cpuc->pmu, num_counters); + unsigned long flags; + int idx; + + if (!num_counters) + return; + + local_irq_save(flags); + + pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); + + for (idx = 0; idx < num_counters; idx++) { + wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); + wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); + } + for (idx = 0; idx < num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx, cpuc->pmu)) + continue; + wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } + + if (ds) + ds->bts_index = ds->bts_buffer_base; + + /* Ack all overflows and disable fixed counters */ + if (x86_pmu.version >= 2) { + intel_pmu_ack_status(intel_pmu_get_status()); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + } + + /* Reset LBRs and LBR freezing */ + if (x86_pmu.lbr_nr) { + update_debugctlmsr(get_debugctlmsr() & + ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); + } + + local_irq_restore(flags); +} + +/* + * We may be running with guest PEBS events created by KVM, and the + * PEBS records are logged into the guest's DS and invisible to host. + * + * In the case of guest PEBS overflow, we only trigger a fake event + * to emulate the PEBS overflow PMI for guest PEBS counters in KVM. + * The guest will then vm-entry and check the guest DS area to read + * the guest PEBS records. + * + * The contents and other behavior of the guest event do not matter. + */ +static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, + struct perf_sample_data *data) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask; + struct perf_event *event = NULL; + int bit; + + if (!unlikely(perf_guest_state())) + return; + + if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active || + !guest_pebs_idxs) + return; + + for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, + INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) { + event = cpuc->events[bit]; + if (!event->attr.precise_ip) + continue; + + perf_sample_data_init(data, 0, event->hw.last_period); + if (perf_event_overflow(event, data, regs)) + x86_pmu_stop(event, 0); + + /* Inject one fake event is enough. */ + break; + } +} + +static int handle_pmi_common(struct pt_regs *regs, u64 status) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int bit; + int handled = 0; + u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); + + inc_irq_stat(apic_perf_irqs); + + /* + * Ignore a range of extra bits in status that do not indicate + * overflow by themselves. + */ + status &= ~(GLOBAL_STATUS_COND_CHG | + GLOBAL_STATUS_ASIF | + GLOBAL_STATUS_LBRS_FROZEN); + if (!status) + return 0; + /* + * In case multiple PEBS events are sampled at the same time, + * it is possible to have GLOBAL_STATUS bit 62 set indicating + * PEBS buffer overflow and also seeing at most 3 PEBS counters + * having their bits set in the status register. This is a sign + * that there was at least one PEBS record pending at the time + * of the PMU interrupt. PEBS counters must only be processed + * via the drain_pebs() calls and not via the regular sample + * processing loop coming after that the function, otherwise + * phony regular samples may be generated in the sampling buffer + * not marked with the EXACT tag. Another possibility is to have + * one PEBS event and at least one non-PEBS event which overflows + * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will + * not be set, yet the overflow status bit for the PEBS counter will + * be on Skylake. + * + * To avoid this problem, we systematically ignore the PEBS-enabled + * counters from the GLOBAL_STATUS mask and we always process PEBS + * events via drain_pebs(). + */ + status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); + + /* + * PEBS overflow sets bit 62 in the global status register + */ + if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) { + u64 pebs_enabled = cpuc->pebs_enabled; + + handled++; + x86_pmu_handle_guest_pebs(regs, &data); + x86_pmu.drain_pebs(regs, &data); + status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + + /* + * PMI throttle may be triggered, which stops the PEBS event. + * Although cpuc->pebs_enabled is updated accordingly, the + * MSR_IA32_PEBS_ENABLE is not updated. Because the + * cpuc->enabled has been forced to 0 in PMI. + * Update the MSR if pebs_enabled is changed. + */ + if (pebs_enabled != cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + } + + /* + * Intel PT + */ + if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { + handled++; + if (!perf_guest_handle_intel_pt_intr()) + intel_pt_interrupt(); + } + + /* + * Intel Perf metrics + */ + if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { + handled++; + static_call(intel_pmu_update_topdown_event)(NULL); + } + + /* + * Checkpointed counters can lead to 'spurious' PMIs because the + * rollback caused by the PMI will have cleared the overflow status + * bit. Therefore always force probe these counters. + */ + status |= cpuc->intel_cp_status; + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (has_branch_stack(event)) { + data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + return handled; +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int intel_pmu_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool late_ack = hybrid_bit(cpuc->pmu, late_ack); + bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); + int loops; + u64 status; + int handled; + int pmu_enabled; + + /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled = cpuc->enabled; + /* + * In general, the early ACK is only applied for old platforms. + * For the big core starts from Haswell, the late ACK should be + * applied. + * For the small core after Tremont, we have to do the ACK right + * before re-enabling counters, which is in the middle of the + * NMI handler. + */ + if (!late_ack && !mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); + cpuc->enabled = 0; + __intel_pmu_disable_all(true); + handled = intel_pmu_drain_bts_buffer(); + handled += intel_bts_interrupt(); + status = intel_pmu_get_status(); + if (!status) + goto done; + + loops = 0; +again: + intel_pmu_lbr_read(); + intel_pmu_ack_status(status); + if (++loops > 100) { + static bool warned; + + if (!warned) { + WARN(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); + warned = true; + } + intel_pmu_reset(); + goto done; + } + + handled += handle_pmi_common(regs, status); + + /* + * Repeat if there is more work to be done: + */ + status = intel_pmu_get_status(); + if (status) + goto again; + +done: + if (mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); + + /* + * Only unmask the NMI after the overflow counters + * have been reset. This avoids spurious NMIs on + * Haswell CPUs. + */ + if (late_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); + return handled; +} + +static struct event_constraint * +intel_bts_constraints(struct perf_event *event) +{ + if (unlikely(intel_pmu_has_bts(event))) + return &bts_constraint; + + return NULL; +} + +/* + * Note: matches a fake event, like Fixed2. + */ +static struct event_constraint * +intel_vlbr_constraints(struct perf_event *event) +{ + struct event_constraint *c = &vlbr_constraint; + + if (unlikely(constraint_match(c, event->hw.config))) { + event->hw.flags |= c->flags; + return c; + } + + return NULL; +} + +static int intel_alt_er(struct cpu_hw_events *cpuc, + int idx, u64 config) +{ + struct extra_reg *extra_regs = hybrid(cpuc->pmu, extra_regs); + int alt_idx = idx; + + if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) + return idx; + + if (idx == EXTRA_REG_RSP_0) + alt_idx = EXTRA_REG_RSP_1; + + if (idx == EXTRA_REG_RSP_1) + alt_idx = EXTRA_REG_RSP_0; + + if (config & ~extra_regs[alt_idx].valid_mask) + return idx; + + return alt_idx; +} + +static void intel_fixup_er(struct perf_event *event, int idx) +{ + struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs); + event->hw.extra_reg.idx = idx; + + if (idx == EXTRA_REG_RSP_0) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= extra_regs[EXTRA_REG_RSP_0].event; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; + } else if (idx == EXTRA_REG_RSP_1) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= extra_regs[EXTRA_REG_RSP_1].event; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; + } +} + +/* + * manage allocation of shared extra msr for certain events + * + * sharing can be: + * per-cpu: to be shared between the various events on a single PMU + * per-core: per-cpu + shared by HT threads + */ +static struct event_constraint * +__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event, + struct hw_perf_event_extra *reg) +{ + struct event_constraint *c = &emptyconstraint; + struct er_account *era; + unsigned long flags; + int idx = reg->idx; + + /* + * reg->alloc can be set due to existing state, so for fake cpuc we + * need to ignore this, otherwise we might fail to allocate proper fake + * state for this extra reg constraint. Also see the comment below. + */ + if (reg->alloc && !cpuc->is_fake) + return NULL; /* call x86_get_event_constraint() */ + +again: + era = &cpuc->shared_regs->regs[idx]; + /* + * we use spin_lock_irqsave() to avoid lockdep issues when + * passing a fake cpuc + */ + raw_spin_lock_irqsave(&era->lock, flags); + + if (!atomic_read(&era->ref) || era->config == reg->config) { + + /* + * If its a fake cpuc -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + * + * Not doing the ER fixup will only result in era->reg being + * wrong, but since we won't actually try and program hardware + * this isn't a problem either. + */ + if (!cpuc->is_fake) { + if (idx != reg->idx) + intel_fixup_er(event, idx); + + /* + * x86_schedule_events() can call get_event_constraints() + * multiple times on events in the case of incremental + * scheduling(). reg->alloc ensures we only do the ER + * allocation once. + */ + reg->alloc = 1; + } + + /* lock in msr value */ + era->config = reg->config; + era->reg = reg->reg; + + /* one more user */ + atomic_inc(&era->ref); + + /* + * need to call x86_get_event_constraint() + * to check if associated event has constraints + */ + c = NULL; + } else { + idx = intel_alt_er(cpuc, idx, reg->config); + if (idx != reg->idx) { + raw_spin_unlock_irqrestore(&era->lock, flags); + goto again; + } + } + raw_spin_unlock_irqrestore(&era->lock, flags); + + return c; +} + +static void +__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, + struct hw_perf_event_extra *reg) +{ + struct er_account *era; + + /* + * Only put constraint if extra reg was actually allocated. Also takes + * care of event which do not use an extra shared reg. + * + * Also, if this is a fake cpuc we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent cpuc state + * either since it'll be thrown out. + */ + if (!reg->alloc || cpuc->is_fake) + return; + + era = &cpuc->shared_regs->regs[reg->idx]; + + /* one fewer user */ + atomic_dec(&era->ref); + + /* allocate again next time */ + reg->alloc = 0; +} + +static struct event_constraint * +intel_shared_regs_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct event_constraint *c = NULL, *d; + struct hw_perf_event_extra *xreg, *breg; + + xreg = &event->hw.extra_reg; + if (xreg->idx != EXTRA_REG_NONE) { + c = __intel_shared_reg_get_constraints(cpuc, event, xreg); + if (c == &emptyconstraint) + return c; + } + breg = &event->hw.branch_reg; + if (breg->idx != EXTRA_REG_NONE) { + d = __intel_shared_reg_get_constraints(cpuc, event, breg); + if (d == &emptyconstraint) { + __intel_shared_reg_put_constraints(cpuc, xreg); + c = d; + } + } + return c; +} + +struct event_constraint * +x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *event_constraints = hybrid(cpuc->pmu, event_constraints); + struct event_constraint *c; + + if (event_constraints) { + for_each_event_constraint(c, event_constraints) { + if (constraint_match(c, event->hw.config)) { + event->hw.flags |= c->flags; + return c; + } + } + } + + return &hybrid_var(cpuc->pmu, unconstrained); +} + +static struct event_constraint * +__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_vlbr_constraints(event); + if (c) + return c; + + c = intel_bts_constraints(event); + if (c) + return c; + + c = intel_shared_regs_constraints(cpuc, event); + if (c) + return c; + + c = intel_pebs_constraints(event); + if (c) + return c; + + return x86_get_event_constraints(cpuc, idx, event); +} + +static void +intel_start_scheduling(struct cpu_hw_events *cpuc) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xl; + int tid = cpuc->excl_thread_id; + + /* + * nothing needed if in group validation mode + */ + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return; + + /* + * no exclusion needed + */ + if (WARN_ON_ONCE(!excl_cntrs)) + return; + + xl = &excl_cntrs->states[tid]; + + xl->sched_started = true; + /* + * lock shared state until we are done scheduling + * in stop_event_scheduling() + * makes scheduling appear as a transaction + */ + raw_spin_lock(&excl_cntrs->lock); +} + +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct event_constraint *c = cpuc->event_constraint[idx]; + struct intel_excl_states *xl; + int tid = cpuc->excl_thread_id; + + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return; + + if (WARN_ON_ONCE(!excl_cntrs)) + return; + + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) + return; + + xl = &excl_cntrs->states[tid]; + + lockdep_assert_held(&excl_cntrs->lock); + + if (c->flags & PERF_X86_EVENT_EXCL) + xl->state[cntr] = INTEL_EXCL_EXCLUSIVE; + else + xl->state[cntr] = INTEL_EXCL_SHARED; +} + +static void +intel_stop_scheduling(struct cpu_hw_events *cpuc) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xl; + int tid = cpuc->excl_thread_id; + + /* + * nothing needed if in group validation mode + */ + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return; + /* + * no exclusion needed + */ + if (WARN_ON_ONCE(!excl_cntrs)) + return; + + xl = &excl_cntrs->states[tid]; + + xl->sched_started = false; + /* + * release shared state lock (acquired in intel_start_scheduling()) + */ + raw_spin_unlock(&excl_cntrs->lock); +} + +static struct event_constraint * +dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx) +{ + WARN_ON_ONCE(!cpuc->constraint_list); + + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { + struct event_constraint *cx; + + /* + * grab pre-allocated constraint entry + */ + cx = &cpuc->constraint_list[idx]; + + /* + * initialize dynamic constraint + * with static constraint + */ + *cx = *c; + + /* + * mark constraint as dynamic + */ + cx->flags |= PERF_X86_EVENT_DYNAMIC; + c = cx; + } + + return c; +} + +static struct event_constraint * +intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, + int idx, struct event_constraint *c) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xlo; + int tid = cpuc->excl_thread_id; + int is_excl, i, w; + + /* + * validating a group does not require + * enforcing cross-thread exclusion + */ + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return c; + + /* + * no exclusion needed + */ + if (WARN_ON_ONCE(!excl_cntrs)) + return c; + + /* + * because we modify the constraint, we need + * to make a copy. Static constraints come + * from static const tables. + * + * only needed when constraint has not yet + * been cloned (marked dynamic) + */ + c = dyn_constraint(cpuc, c, idx); + + /* + * From here on, the constraint is dynamic. + * Either it was just allocated above, or it + * was allocated during a earlier invocation + * of this function + */ + + /* + * state of sibling HT + */ + xlo = &excl_cntrs->states[tid ^ 1]; + + /* + * event requires exclusive counter access + * across HT threads + */ + is_excl = c->flags & PERF_X86_EVENT_EXCL; + if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { + event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; + if (!cpuc->n_excl++) + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); + } + + /* + * Modify static constraint with current dynamic + * state of thread + * + * EXCLUSIVE: sibling counter measuring exclusive event + * SHARED : sibling counter measuring non-exclusive event + * UNUSED : sibling counter unused + */ + w = c->weight; + for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { + /* + * exclusive event in sibling counter + * our corresponding counter cannot be used + * regardless of our event + */ + if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) { + __clear_bit(i, c->idxmsk); + w--; + continue; + } + /* + * if measuring an exclusive event, sibling + * measuring non-exclusive, then counter cannot + * be used + */ + if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) { + __clear_bit(i, c->idxmsk); + w--; + continue; + } + } + + /* + * if we return an empty mask, then switch + * back to static empty constraint to avoid + * the cost of freeing later on + */ + if (!w) + c = &emptyconstraint; + + c->weight = w; + + return c; +} + +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c1, *c2; + + c1 = cpuc->event_constraint[idx]; + + /* + * first time only + * - static constraint: no change across incremental scheduling calls + * - dynamic constraint: handled by intel_get_excl_constraints() + */ + c2 = __intel_get_event_constraints(cpuc, idx, event); + if (c1) { + WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC)); + bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); + c1->weight = c2->weight; + c2 = c1; + } + + if (cpuc->excl_cntrs) + return intel_get_excl_constraints(cpuc, event, idx, c2); + + return c2; +} + +static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + int tid = cpuc->excl_thread_id; + struct intel_excl_states *xl; + + /* + * nothing needed if in group validation mode + */ + if (cpuc->is_fake) + return; + + if (WARN_ON_ONCE(!excl_cntrs)) + return; + + if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { + hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; + if (!--cpuc->n_excl) + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); + } + + /* + * If event was actually assigned, then mark the counter state as + * unused now. + */ + if (hwc->idx >= 0) { + xl = &excl_cntrs->states[tid]; + + /* + * put_constraint may be called from x86_schedule_events() + * which already has the lock held so here make locking + * conditional. + */ + if (!xl->sched_started) + raw_spin_lock(&excl_cntrs->lock); + + xl->state[hwc->idx] = INTEL_EXCL_UNUSED; + + if (!xl->sched_started) + raw_spin_unlock(&excl_cntrs->lock); + } +} + +static void +intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event_extra *reg; + + reg = &event->hw.extra_reg; + if (reg->idx != EXTRA_REG_NONE) + __intel_shared_reg_put_constraints(cpuc, reg); + + reg = &event->hw.branch_reg; + if (reg->idx != EXTRA_REG_NONE) + __intel_shared_reg_put_constraints(cpuc, reg); +} + +static void intel_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + intel_put_shared_regs_event_constraints(cpuc, event); + + /* + * is PMU has exclusive counter restrictions, then + * all events are subject to and must call the + * put_excl_constraints() routine + */ + if (cpuc->excl_cntrs) + intel_put_excl_constraints(cpuc, event); +} + +static void intel_pebs_aliases_core2(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use INST_RETIRED.ANY_P + * (0x00c0), which is a PEBS capable event, to get the same + * count. + * + * INST_RETIRED.ANY_P counts the number of cycles that retires + * CNTMASK instructions. By setting CNTMASK to a value (16) + * larger than the maximum number of instructions that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less instructions, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_snb(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use UOPS_RETIRED.ALL + * (0x01c2), which is a PEBS capable event, to get the same + * count. + * + * UOPS_RETIRED.ALL counts the number of cycles that retires + * CNTMASK micro-ops. By setting CNTMASK to a value (16) + * larger than the maximum number of micro-ops that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less micro-ops, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); + + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_precdist(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use INST_RETIRED.PREC_DIST + * (0x01c0), which is a PEBS capable event, to get the same + * count. + * + * The PREC_DIST event has special support to minimize sample + * shadowing effects. One drawback is that it can be + * only programmed on counter 1, but that seems like an + * acceptable trade off. + */ + u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16); + + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_ivb(struct perf_event *event) +{ + if (event->attr.precise_ip < 3) + return intel_pebs_aliases_snb(event); + return intel_pebs_aliases_precdist(event); +} + +static void intel_pebs_aliases_skl(struct perf_event *event) +{ + if (event->attr.precise_ip < 3) + return intel_pebs_aliases_core2(event); + return intel_pebs_aliases_precdist(event); +} + +static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) +{ + unsigned long flags = x86_pmu.large_pebs_flags; + + if (event->attr.use_clockid) + flags &= ~PERF_SAMPLE_TIME; + if (!event->attr.exclude_kernel) + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_user & ~PEBS_GP_REGS) + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); + return flags; +} + +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + if (unlikely(intel_pmu_has_bts(event))) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* BTS is not allowed for precise events. */ + if (attr->precise_ip) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + +#define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \ + ((x86_pmu.num_topdown_events - 1) << 8)) + +static bool is_available_metric_event(struct perf_event *event) +{ + return is_metric_event(event) && + event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX; +} + +static inline bool is_mem_loads_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01); +} + +static inline bool is_mem_loads_aux_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82); +} + +static inline bool require_mem_loads_aux_event(struct perf_event *event) +{ + if (!(x86_pmu.flags & PMU_FL_MEM_LOADS_AUX)) + return false; + + if (is_hybrid()) + return hybrid_pmu(event->pmu)->cpu_type == hybrid_big; + + return true; +} + +static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) +{ + union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap); + + return test_bit(idx, (unsigned long *)&intel_cap->capabilities); +} + +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + ret = intel_pmu_bts_config(event); + if (ret) + return ret; + + if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { + event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; + if (!(event->attr.sample_type & + ~intel_pmu_large_pebs_flags(event))) { + event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; + event->attach_state |= PERF_ATTACH_SCHED_CB; + } + } + if (x86_pmu.pebs_aliases) + x86_pmu.pebs_aliases(event); + } + + if (needs_branch_stack(event)) { + ret = intel_pmu_setup_lbr_filter(event); + if (ret) + return ret; + event->attach_state |= PERF_ATTACH_SCHED_CB; + + /* + * BTS is set up earlier in this path, so don't account twice + */ + if (!unlikely(intel_pmu_has_bts(event))) { + /* disallow lbr if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + } + + if (event->attr.aux_output) { + if (!event->attr.precise_ip) + return -EINVAL; + + event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT; + } + + if ((event->attr.type == PERF_TYPE_HARDWARE) || + (event->attr.type == PERF_TYPE_HW_CACHE)) + return 0; + + /* + * Config Topdown slots and metric events + * + * The slots event on Fixed Counter 3 can support sampling, + * which will be handled normally in x86_perf_event_update(). + * + * Metric events don't support sampling and require being paired + * with a slots event as group leader. When the slots event + * is used in a metrics group, it too cannot support sampling. + */ + if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) { + if (event->attr.config1 || event->attr.config2) + return -EINVAL; + + /* + * The TopDown metrics events and slots event don't + * support any filters. + */ + if (event->attr.config & X86_ALL_EVENT_FLAGS) + return -EINVAL; + + if (is_available_metric_event(event)) { + struct perf_event *leader = event->group_leader; + + /* The metric events don't support sampling. */ + if (is_sampling_event(event)) + return -EINVAL; + + /* The metric events require a slots group leader. */ + if (!is_slots_event(leader)) + return -EINVAL; + + /* + * The leader/SLOTS must not be a sampling event for + * metric use; hardware requires it starts at 0 when used + * in conjunction with MSR_PERF_METRICS. + */ + if (is_sampling_event(leader)) + return -EINVAL; + + event->event_caps |= PERF_EV_CAP_SIBLING; + /* + * Only once we have a METRICs sibling do we + * need TopDown magic. + */ + leader->hw.flags |= PERF_X86_EVENT_TOPDOWN; + event->hw.flags |= PERF_X86_EVENT_TOPDOWN; + } + } + + /* + * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR + * doesn't function quite right. As a work-around it needs to always be + * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82). + * The actual count of this second event is irrelevant it just needs + * to be active to make the first event function correctly. + * + * In a group, the auxiliary event must be in front of the load latency + * event. The rule is to simplify the implementation of the check. + * That's because perf cannot have a complete group at the moment. + */ + if (require_mem_loads_aux_event(event) && + (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) && + is_mem_loads_event(event)) { + struct perf_event *leader = event->group_leader; + struct perf_event *sibling = NULL; + + /* + * When this memload event is also the first event (no group + * exists yet), then there is no aux event before it. + */ + if (leader == event) + return -ENODATA; + + if (!is_mem_loads_aux_event(leader)) { + for_each_sibling_event(sibling, leader) { + if (is_mem_loads_aux_event(sibling)) + break; + } + if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list)) + return -ENODATA; + } + } + + if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) + return 0; + + if (x86_pmu.version < 3) + return -EINVAL; + + ret = perf_allow_cpu(&event->attr); + if (ret) + return ret; + + event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; + + return 0; +} + +/* + * Currently, the only caller of this function is the atomic_switch_perf_msrs(). + * The host perf conext helps to prepare the values of the real hardware for + * a set of msrs that need to be switched atomically in a vmx transaction. + * + * For example, the pseudocode needed to add a new msr should look like: + * + * arr[(*nr)++] = (struct perf_guest_switch_msr){ + * .msr = the hardware msr address, + * .host = the value the hardware has when it doesn't run a guest, + * .guest = the value the hardware has when it runs a guest, + * }; + * + * These values have nothing to do with the emulated values the guest sees + * when it uses {RD,WR}MSR, which should be handled by the KVM context, + * specifically in the intel_pmu_{get,set}_msr(). + */ +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data; + u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); + u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable; + int global_ctrl, pebs_enable; + + /* + * In addition to obeying exclude_guest/exclude_host, remove bits being + * used for PEBS when running a guest, because PEBS writes to virtual + * addresses (not physical addresses). + */ + *nr = 0; + global_ctrl = (*nr)++; + arr[global_ctrl] = (struct perf_guest_switch_msr){ + .msr = MSR_CORE_PERF_GLOBAL_CTRL, + .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, + .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, + }; + + if (!x86_pmu.pebs) + return arr; + + /* + * If PMU counter has PEBS enabled it is not enough to + * disable counter on a guest entry since PEBS memory + * write can overshoot guest entry and corrupt guest + * memory. Disabling PEBS solves the problem. + * + * Don't do this if the CPU already enforces it. + */ + if (x86_pmu.pebs_no_isolation) { + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_PEBS_ENABLE, + .host = cpuc->pebs_enabled, + .guest = 0, + }; + return arr; + } + + if (!kvm_pmu || !x86_pmu.pebs_ept) + return arr; + + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_DS_AREA, + .host = (unsigned long)cpuc->ds, + .guest = kvm_pmu->ds_area, + }; + + if (x86_pmu.intel_cap.pebs_baseline) { + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_PEBS_DATA_CFG, + .host = cpuc->pebs_data_cfg, + .guest = kvm_pmu->pebs_data_cfg, + }; + } + + pebs_enable = (*nr)++; + arr[pebs_enable] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_PEBS_ENABLE, + .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + }; + + if (arr[pebs_enable].host) { + /* Disable guest PEBS if host PEBS is enabled. */ + arr[pebs_enable].guest = 0; + } else { + /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */ + arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; + arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask; + /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ + arr[global_ctrl].guest |= arr[pebs_enable].guest; + } + + return arr; +} + +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_event *event = cpuc->events[idx]; + + arr[idx].msr = x86_pmu_config_addr(idx); + arr[idx].host = arr[idx].guest = 0; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + arr[idx].host = arr[idx].guest = + event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; + + if (event->attr.exclude_host) + arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + else if (event->attr.exclude_guest) + arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + } + + *nr = x86_pmu.num_counters; + return arr; +} + +static void core_pmu_enable_event(struct perf_event *event) +{ + if (!event->attr.exclude_host) + x86_pmu_enable_event(event); +} + +static void core_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; + + if (!test_bit(idx, cpuc->active_mask) || + cpuc->events[idx]->attr.exclude_host) + continue; + + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + } +} + +static int hsw_hw_config(struct perf_event *event) +{ + int ret = intel_pmu_hw_config(event); + + if (ret) + return ret; + if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) + return 0; + event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); + + /* + * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with + * PEBS or in ANY thread mode. Since the results are non-sensical forbid + * this combination. + */ + if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && + ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || + event->attr.precise_ip > 0)) + return -EOPNOTSUPP; + + if (event_is_checkpointed(event)) { + /* + * Sampling of checkpointed events can cause situations where + * the CPU constantly aborts because of a overflow, which is + * then checkpointed back and ignored. Forbid checkpointing + * for sampling. + * + * But still allow a long sampling period, so that perf stat + * from KVM works. + */ + if (event->attr.sample_period > 0 && + event->attr.sample_period < 0x7fffffff) + return -EOPNOTSUPP; + } + return 0; +} + +static struct event_constraint counter0_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1); + +static struct event_constraint counter2_constraint = + EVENT_CONSTRAINT(0, 0x4, 0); + +static struct event_constraint fixed0_constraint = + FIXED_EVENT_CONSTRAINT(0x00c0, 0); + +static struct event_constraint fixed0_counter0_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL); + +static struct event_constraint * +hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_get_event_constraints(cpuc, idx, event); + + /* Handle special quirk on in_tx_checkpointed only in counter 2 */ + if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { + if (c->idxmsk64 & (1U << 2)) + return &counter2_constraint; + return &emptyconstraint; + } + + return c; +} + +static struct event_constraint * +icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + /* + * Fixed counter 0 has less skid. + * Force instruction:ppp in Fixed counter 0 + */ + if ((event->attr.precise_ip == 3) && + constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_constraint; + + return hsw_get_event_constraints(cpuc, idx, event); +} + +static struct event_constraint * +spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = icl_get_event_constraints(cpuc, idx, event); + + /* + * The :ppp indicates the Precise Distribution (PDist) facility, which + * is only supported on the GP counter 0. If a :ppp event which is not + * available on the GP counter 0, error out. + * Exception: Instruction PDIR is only available on the fixed counter 0. + */ + if ((event->attr.precise_ip == 3) && + !constraint_match(&fixed0_constraint, event->hw.config)) { + if (c->idxmsk64 & BIT_ULL(0)) + return &counter0_constraint; + + return &emptyconstraint; + } + + return c; +} + +static struct event_constraint * +glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + /* :ppp means to do reduced skid PEBS which is PMC0 only. */ + if (event->attr.precise_ip == 3) + return &counter0_constraint; + + c = intel_get_event_constraints(cpuc, idx, event); + + return c; +} + +static struct event_constraint * +tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_get_event_constraints(cpuc, idx, event); + + /* + * :ppp means to do reduced skid PEBS, + * which is available on PMC0 and fixed counter 0. + */ + if (event->attr.precise_ip == 3) { + /* Force instruction:ppp on PMC0 and Fixed counter 0 */ + if (constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_counter0_constraint; + + return &counter0_constraint; + } + + return c; +} + +static bool allow_tsx_force_abort = true; + +static struct event_constraint * +tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event); + + /* + * Without TFA we must not use PMC3. + */ + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { + c = dyn_constraint(cpuc, c, idx); + c->idxmsk64 &= ~(1ULL << 3); + c->weight--; + } + + return c; +} + +static struct event_constraint * +adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type == hybrid_big) + return spr_get_event_constraints(cpuc, idx, event); + else if (pmu->cpu_type == hybrid_small) + return tnt_get_event_constraints(cpuc, idx, event); + + WARN_ON(1); + return &emptyconstraint; +} + +static int adl_hw_config(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type == hybrid_big) + return hsw_hw_config(event); + else if (pmu->cpu_type == hybrid_small) + return intel_pmu_hw_config(event); + + WARN_ON(1); + return -EOPNOTSUPP; +} + +static u8 adl_get_hybrid_cpu_type(void) +{ + return hybrid_big; +} + +/* + * Broadwell: + * + * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared + * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine + * the two to enforce a minimum period of 128 (the smallest value that has bits + * 0-5 cleared and >= 100). + * + * Because of how the code in x86_perf_event_set_period() works, the truncation + * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period + * to make up for the 'lost' events due to carrying the 'error' in period_left. + * + * Therefore the effective (average) period matches the requested period, + * despite coarser hardware granularity. + */ +static void bdw_limit_period(struct perf_event *event, s64 *left) +{ + if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (*left < 128) + *left = 128; + *left &= ~0x3fULL; + } +} + +static void nhm_limit_period(struct perf_event *event, s64 *left) +{ + *left = max(*left, 32LL); +} + +static void spr_limit_period(struct perf_event *event, s64 *left) +{ + if (event->attr.precise_ip == 3) + *left = max(*left, 128LL); +} + +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); +PMU_FORMAT_ATTR(in_tx, "config:32"); +PMU_FORMAT_ATTR(in_tx_cp, "config:33"); + +static struct attribute *intel_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +ssize_t intel_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); + + return x86_event_sysfs_show(page, config, event); +} + +static struct intel_shared_regs *allocate_shared_regs(int cpu) +{ + struct intel_shared_regs *regs; + int i; + + regs = kzalloc_node(sizeof(struct intel_shared_regs), + GFP_KERNEL, cpu_to_node(cpu)); + if (regs) { + /* + * initialize the locks to keep lockdep happy + */ + for (i = 0; i < EXTRA_REG_MAX; i++) + raw_spin_lock_init(®s->regs[i].lock); + + regs->core_id = -1; + } + return regs; +} + +static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) +{ + struct intel_excl_cntrs *c; + + c = kzalloc_node(sizeof(struct intel_excl_cntrs), + GFP_KERNEL, cpu_to_node(cpu)); + if (c) { + raw_spin_lock_init(&c->lock); + c->core_id = -1; + } + return c; +} + + +int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) +{ + cpuc->pebs_record_size = x86_pmu.pebs_record_size; + + if (is_hybrid() || x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { + cpuc->shared_regs = allocate_shared_regs(cpu); + if (!cpuc->shared_regs) + goto err; + } + + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) { + size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); + + cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); + if (!cpuc->constraint_list) + goto err_shared_regs; + } + + if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { + cpuc->excl_cntrs = allocate_excl_cntrs(cpu); + if (!cpuc->excl_cntrs) + goto err_constraint_list; + + cpuc->excl_thread_id = 0; + } + + return 0; + +err_constraint_list: + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; + +err_shared_regs: + kfree(cpuc->shared_regs); + cpuc->shared_regs = NULL; + +err: + return -ENOMEM; +} + +static int intel_pmu_cpu_prepare(int cpu) +{ + return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); +} + +static void flip_smm_bit(void *data) +{ + unsigned long set = *(unsigned long *)data; + + if (set > 0) { + msr_set_bit(MSR_IA32_DEBUGCTLMSR, + DEBUGCTLMSR_FREEZE_IN_SMM_BIT); + } else { + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, + DEBUGCTLMSR_FREEZE_IN_SMM_BIT); + } +} + +static bool init_hybrid_pmu(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + u8 cpu_type = get_this_hybrid_cpu_type(); + struct x86_hybrid_pmu *pmu = NULL; + int i; + + if (!cpu_type && x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) { + pmu = &x86_pmu.hybrid_pmu[i]; + break; + } + } + if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) { + cpuc->pmu = NULL; + return false; + } + + /* Only check and dump the PMU information for the first CPU */ + if (!cpumask_empty(&pmu->supported_cpus)) + goto end; + + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) + return false; + + pr_info("%s PMU driver: ", pmu->name); + + if (pmu->intel_cap.pebs_output_pt_available) + pr_cont("PEBS-via-PT "); + + pr_cont("\n"); + + x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed, + pmu->intel_ctrl); + +end: + cpumask_set_cpu(cpu, &pmu->supported_cpus); + cpuc->pmu = &pmu->pmu; + + x86_pmu_update_cpu_context(&pmu->pmu, cpu); + + return true; +} + +static void intel_pmu_cpu_starting(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int core_id = topology_core_id(cpu); + int i; + + if (is_hybrid() && !init_hybrid_pmu(cpu)) + return; + + init_debug_store_on_cpu(cpu); + /* + * Deal with CPUs that don't clear their LBRs on power-up. + */ + intel_pmu_lbr_reset(); + + cpuc->lbr_sel = NULL; + + if (x86_pmu.flags & PMU_FL_TFA) { + WARN_ON_ONCE(cpuc->tfa_shadow); + cpuc->tfa_shadow = ~0ULL; + intel_set_tfa(cpuc, false); + } + + if (x86_pmu.version > 1) + flip_smm_bit(&x86_pmu.attr_freeze_on_smi); + + /* + * Disable perf metrics if any added CPU doesn't support it. + * + * Turn off the check for a hybrid architecture, because the + * architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate + * the architecture features. The perf metrics is a model-specific + * feature for now. The corresponding bit should always be 0 on + * a hybrid platform, e.g., Alder Lake. + */ + if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) { + union perf_capabilities perf_cap; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); + if (!perf_cap.perf_metrics) { + x86_pmu.intel_cap.perf_metrics = 0; + x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); + } + } + + if (!cpuc->shared_regs) + return; + + if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { + for_each_cpu(i, topology_sibling_cpumask(cpu)) { + struct intel_shared_regs *pc; + + pc = per_cpu(cpu_hw_events, i).shared_regs; + if (pc && pc->core_id == core_id) { + cpuc->kfree_on_online[0] = cpuc->shared_regs; + cpuc->shared_regs = pc; + break; + } + } + cpuc->shared_regs->core_id = core_id; + cpuc->shared_regs->refcnt++; + } + + if (x86_pmu.lbr_sel_map) + cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; + + if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { + for_each_cpu(i, topology_sibling_cpumask(cpu)) { + struct cpu_hw_events *sibling; + struct intel_excl_cntrs *c; + + sibling = &per_cpu(cpu_hw_events, i); + c = sibling->excl_cntrs; + if (c && c->core_id == core_id) { + cpuc->kfree_on_online[1] = cpuc->excl_cntrs; + cpuc->excl_cntrs = c; + if (!sibling->excl_thread_id) + cpuc->excl_thread_id = 1; + break; + } + } + cpuc->excl_cntrs->core_id = core_id; + cpuc->excl_cntrs->refcnt++; + } +} + +static void free_excl_cntrs(struct cpu_hw_events *cpuc) +{ + struct intel_excl_cntrs *c; + + c = cpuc->excl_cntrs; + if (c) { + if (c->core_id == -1 || --c->refcnt == 0) + kfree(c); + cpuc->excl_cntrs = NULL; + } + + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; +} + +static void intel_pmu_cpu_dying(int cpu) +{ + fini_debug_store_on_cpu(cpu); +} + +void intel_cpuc_finish(struct cpu_hw_events *cpuc) +{ + struct intel_shared_regs *pc; + + pc = cpuc->shared_regs; + if (pc) { + if (pc->core_id == -1 || --pc->refcnt == 0) + kfree(pc); + cpuc->shared_regs = NULL; + } + + free_excl_cntrs(cpuc); +} + +static void intel_pmu_cpu_dead(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + intel_cpuc_finish(cpuc); + + if (is_hybrid() && cpuc->pmu) + cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus); +} + +static void intel_pmu_sched_task(struct perf_event_context *ctx, + bool sched_in) +{ + intel_pmu_pebs_sched_task(ctx, sched_in); + intel_pmu_lbr_sched_task(ctx, sched_in); +} + +static void intel_pmu_swap_task_ctx(struct perf_event_context *prev, + struct perf_event_context *next) +{ + intel_pmu_lbr_swap_task_ctx(prev, next); +} + +static int intel_pmu_check_period(struct perf_event *event, u64 value) +{ + return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; +} + +static void intel_aux_output_init(void) +{ + /* Refer also intel_pmu_aux_output_match() */ + if (x86_pmu.intel_cap.pebs_output_pt_available) + x86_pmu.assign = intel_pmu_assign_event; +} + +static int intel_pmu_aux_output_match(struct perf_event *event) +{ + /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */ + if (!x86_pmu.intel_cap.pebs_output_pt_available) + return 0; + + return is_intel_pt_event(event); +} + +static int intel_pmu_filter_match(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + + return cpumask_test_cpu(cpu, &pmu->supported_cpus); +} + +PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); + +PMU_FORMAT_ATTR(ldlat, "config1:0-15"); + +PMU_FORMAT_ATTR(frontend, "config1:0-23"); + +static struct attribute *intel_arch3_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_any.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static struct attribute *hsw_format_attr[] = { + &format_attr_in_tx.attr, + &format_attr_in_tx_cp.attr, + &format_attr_offcore_rsp.attr, + &format_attr_ldlat.attr, + NULL +}; + +static struct attribute *nhm_format_attr[] = { + &format_attr_offcore_rsp.attr, + &format_attr_ldlat.attr, + NULL +}; + +static struct attribute *slm_format_attr[] = { + &format_attr_offcore_rsp.attr, + NULL +}; + +static struct attribute *skl_format_attr[] = { + &format_attr_frontend.attr, + NULL, +}; + +static __initconst const struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = core_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + .large_pebs_flags = LARGE_PEBS_FLAGS, + + /* + * Intel PMCs cannot be accessed sanely above 32-bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL<<31) - 1, + .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .event_constraints = intel_core_event_constraints, + .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + + /* + * Virtual (or funny metal) CPU can define x86_pmu.extra_regs + * together with PMU version 1 and thus be using core_pmu with + * shared_regs. We need following callbacks here to allocate + * it properly. + */ + .cpu_prepare = intel_pmu_cpu_prepare, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, + + .check_period = intel_pmu_check_period, + + .lbr_reset = intel_pmu_lbr_reset_64, + .lbr_read = intel_pmu_lbr_read_64, + .lbr_save = intel_pmu_lbr_save, + .lbr_restore = intel_pmu_lbr_restore, +}; + +static __initconst const struct x86_pmu intel_pmu = { + .name = "Intel", + .handle_irq = intel_pmu_handle_irq, + .disable_all = intel_pmu_disable_all, + .enable_all = intel_pmu_enable_all, + .enable = intel_pmu_enable_event, + .disable = intel_pmu_disable_event, + .add = intel_pmu_add_event, + .del = intel_pmu_del_event, + .read = intel_pmu_read_event, + .set_period = intel_pmu_set_period, + .update = intel_pmu_update, + .hw_config = intel_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + .large_pebs_flags = LARGE_PEBS_FLAGS, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .pebs_aliases = intel_pebs_aliases_core2, + + .format_attrs = intel_arch3_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + + .cpu_prepare = intel_pmu_cpu_prepare, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, + + .guest_get_msrs = intel_guest_get_msrs, + .sched_task = intel_pmu_sched_task, + .swap_task_ctx = intel_pmu_swap_task_ctx, + + .check_period = intel_pmu_check_period, + + .aux_output_match = intel_pmu_aux_output_match, + + .lbr_reset = intel_pmu_lbr_reset_64, + .lbr_read = intel_pmu_lbr_read_64, + .lbr_save = intel_pmu_lbr_save, + .lbr_restore = intel_pmu_lbr_restore, + + /* + * SMM has access to all 4 rings and while traditionally SMM code only + * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM. + * + * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction + * between SMM or not, this results in what should be pure userspace + * counters including SMM data. + * + * This is a clear privilege issue, therefore globally disable + * counting SMM by default. + */ + .attr_freeze_on_smi = 1, +}; + +static __init void intel_clovertown_quirk(void) +{ + /* + * PEBS is unreliable due to: + * + * AJ67 - PEBS may experience CPL leaks + * AJ68 - PEBS PMI may be delayed by one event + * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] + * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS + * + * AJ67 could be worked around by restricting the OS/USR flags. + * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. + * + * AJ106 could possibly be worked around by not allowing LBR + * usage from PEBS, including the fixup. + * AJ68 could possibly be worked around by always programming + * a pebs_event_reset[0] value and coping with the lost events. + * + * But taken together it might just make sense to not enable PEBS on + * these chips. + */ + pr_warn("PEBS disabled due to CPU errata\n"); + x86_pmu.pebs = 0; + x86_pmu.pebs_constraints = NULL; +} + +static const struct x86_cpu_desc isolation_ucodes[] = { + INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e), + {} +}; + +static void intel_check_pebs_isolation(void) +{ + x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes); +} + +static __init void intel_pebs_isolation_quirk(void) +{ + WARN_ON_ONCE(x86_pmu.check_microcode); + x86_pmu.check_microcode = intel_check_pebs_isolation; + intel_check_pebs_isolation(); +} + +static const struct x86_cpu_desc pebs_ucodes[] = { + INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028), + INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618), + INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c), + {} +}; + +static bool intel_snb_pebs_broken(void) +{ + return !x86_cpu_has_min_microcode_rev(pebs_ucodes); +} + +static void intel_snb_check_microcode(void) +{ + if (intel_snb_pebs_broken() == x86_pmu.pebs_broken) + return; + + /* + * Serialized by the microcode lock.. + */ + if (x86_pmu.pebs_broken) { + pr_info("PEBS enabled due to microcode update\n"); + x86_pmu.pebs_broken = 0; + } else { + pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); + x86_pmu.pebs_broken = 1; + } +} + +static bool is_lbr_from(unsigned long msr) +{ + unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + + return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; +} + +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Disable the check for real HW, so we don't + * mess with potentially enabled registers: + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + + if (is_lbr_from(msr)) + val_tmp = lbr_from_signext_quirk_wr(val_tmp); + + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + /* + * Quirk only affects validation in wrmsr(), so wrmsrl()'s value + * should equal rdmsrl()'s even with the quirk. + */ + if (val_new != val_tmp) + return false; + + if (is_lbr_from(msr)) + val_old = lbr_from_signext_quirk_wr(val_old); + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + +static __init void intel_sandybridge_quirk(void) +{ + x86_pmu.check_microcode = intel_snb_check_microcode; + cpus_read_lock(); + intel_snb_check_microcode(); + cpus_read_unlock(); +} + +static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { + { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, + { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, + { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, + { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, + { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, + { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, + { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, +}; + +static __init void intel_arch_events_quirk(void) +{ + int bit; + + /* disable event that reported as not present by cpuid */ + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { + intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; + pr_warn("CPUID marked event: \'%s\' unavailable\n", + intel_arch_events_map[bit].name); + } +} + +static __init void intel_nehalem_quirk(void) +{ + union cpuid10_ebx ebx; + + ebx.full = x86_pmu.events_maskl; + if (ebx.split.no_branch_misses_retired) { + /* + * Erratum AAJ80 detected, we work it around by using + * the BR_MISP_EXEC.ANY event. This will over-count + * branch-misses, but it's still much better than the + * architectural event which is often completely bogus: + */ + intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; + ebx.split.no_branch_misses_retired = 0; + x86_pmu.events_maskl = ebx.full; + pr_info("CPU erratum AAJ80 worked around\n"); + } +} + +/* + * enable software workaround for errata: + * SNB: BJ122 + * IVB: BV98 + * HSW: HSD29 + * + * Only needed when HT is enabled. However detecting + * if HT is enabled is difficult (model specific). So instead, + * we enable the workaround in the early boot, and verify if + * it is needed in a later initcall phase once we have valid + * topology information to check if HT is actually enabled + */ +static __init void intel_ht_bug(void) +{ + x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; + + x86_pmu.start_scheduling = intel_start_scheduling; + x86_pmu.commit_scheduling = intel_commit_scheduling; + x86_pmu.stop_scheduling = intel_stop_scheduling; +} + +EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") + +/* Haswell special events */ +EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); +EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); +EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); +EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); +EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); +EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); +EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); +EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); + +static struct attribute *hsw_events_attrs[] = { + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), + NULL +}; + +static struct attribute *hsw_mem_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_hsw), + NULL, +}; + +static struct attribute *hsw_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_capacity), + EVENT_PTR(tx_conflict), + EVENT_PTR(el_start), + EVENT_PTR(el_commit), + EVENT_PTR(el_abort), + EVENT_PTR(el_capacity), + EVENT_PTR(el_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL +}; + +EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80"); +EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80"); +EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2"); + +static struct attribute *icl_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_hsw), + NULL, +}; + +static struct attribute *icl_td_events_attrs[] = { + EVENT_PTR(slots), + EVENT_PTR(td_retiring), + EVENT_PTR(td_bad_spec), + EVENT_PTR(td_fe_bound), + EVENT_PTR(td_be_bound), + NULL, +}; + +static struct attribute *icl_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_capacity_read), + EVENT_PTR(tx_capacity_write), + EVENT_PTR(tx_conflict), + EVENT_PTR(el_start), + EVENT_PTR(el_abort), + EVENT_PTR(el_commit), + EVENT_PTR(el_capacity_read), + EVENT_PTR(el_capacity_write), + EVENT_PTR(el_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL, +}; + + +EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); +EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); + +static struct attribute *spr_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_spr), + EVENT_PTR(mem_ld_aux), + NULL, +}; + +static struct attribute *spr_td_events_attrs[] = { + EVENT_PTR(slots), + EVENT_PTR(td_retiring), + EVENT_PTR(td_bad_spec), + EVENT_PTR(td_fe_bound), + EVENT_PTR(td_be_bound), + EVENT_PTR(td_heavy_ops), + EVENT_PTR(td_br_mispredict), + EVENT_PTR(td_fetch_lat), + EVENT_PTR(td_mem_bound), + NULL, +}; + +static struct attribute *spr_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_capacity_read), + EVENT_PTR(tx_capacity_write), + EVENT_PTR(tx_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL, +}; + +static ssize_t freeze_on_smi_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi); +} + +static DEFINE_MUTEX(freeze_on_smi_mutex); + +static ssize_t freeze_on_smi_store(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + ssize_t ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + mutex_lock(&freeze_on_smi_mutex); + + if (x86_pmu.attr_freeze_on_smi == val) + goto done; + + x86_pmu.attr_freeze_on_smi = val; + + cpus_read_lock(); + on_each_cpu(flip_smm_bit, &val, 1); + cpus_read_unlock(); +done: + mutex_unlock(&freeze_on_smi_mutex); + + return count; +} + +static void update_tfa_sched(void *ignored) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * check if PMC3 is used + * and if so force schedule out for all event types all contexts + */ + if (test_bit(3, cpuc->active_mask)) + perf_pmu_resched(x86_get_pmu(smp_processor_id())); +} + +static ssize_t show_sysctl_tfa(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", allow_tsx_force_abort); +} + +static ssize_t set_sysctl_tfa(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool val; + ssize_t ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + /* no change */ + if (val == allow_tsx_force_abort) + return count; + + allow_tsx_force_abort = val; + + cpus_read_lock(); + on_each_cpu(update_tfa_sched, NULL, 1); + cpus_read_unlock(); + + return count; +} + + +static DEVICE_ATTR_RW(freeze_on_smi); + +static ssize_t branches_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); +} + +static DEVICE_ATTR_RO(branches); + +static struct attribute *lbr_attrs[] = { + &dev_attr_branches.attr, + NULL +}; + +static char pmu_name_str[30]; + +static ssize_t pmu_name_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str); +} + +static DEVICE_ATTR_RO(pmu_name); + +static struct attribute *intel_pmu_caps_attrs[] = { + &dev_attr_pmu_name.attr, + NULL +}; + +static DEVICE_ATTR(allow_tsx_force_abort, 0644, + show_sysctl_tfa, + set_sysctl_tfa); + +static struct attribute *intel_pmu_attrs[] = { + &dev_attr_freeze_on_smi.attr, + &dev_attr_allow_tsx_force_abort.attr, + NULL, +}; + +static umode_t +tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; +} + +static umode_t +pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.pebs ? attr->mode : 0; +} + +static umode_t +lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} + +static umode_t +exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.version >= 2 ? attr->mode : 0; +} + +static umode_t +default_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &dev_attr_allow_tsx_force_abort.attr) + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; + + return attr->mode; +} + +static struct attribute_group group_events_td = { + .name = "events", +}; + +static struct attribute_group group_events_mem = { + .name = "events", + .is_visible = pebs_is_visible, +}; + +static struct attribute_group group_events_tsx = { + .name = "events", + .is_visible = tsx_is_visible, +}; + +static struct attribute_group group_caps_gen = { + .name = "caps", + .attrs = intel_pmu_caps_attrs, +}; + +static struct attribute_group group_caps_lbr = { + .name = "caps", + .attrs = lbr_attrs, + .is_visible = lbr_is_visible, +}; + +static struct attribute_group group_format_extra = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_format_extra_skl = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_default = { + .attrs = intel_pmu_attrs, + .is_visible = default_is_visible, +}; + +static const struct attribute_group *attr_update[] = { + &group_events_td, + &group_events_mem, + &group_events_tsx, + &group_caps_gen, + &group_caps_lbr, + &group_format_extra, + &group_format_extra_skl, + &group_default, + NULL, +}; + +EVENT_ATTR_STR_HYBRID(slots, slots_adl, "event=0x00,umask=0x4", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_adl, "event=0xc2,umask=0x0;event=0x00,umask=0x80", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-bad-spec, td_bad_spec_adl, "event=0x73,umask=0x0;event=0x00,umask=0x81", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_adl, "event=0x71,umask=0x0;event=0x00,umask=0x82", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_adl, "event=0x74,umask=0x0;event=0x00,umask=0x83", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-heavy-ops, td_heavy_ops_adl, "event=0x00,umask=0x84", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-br-mispredict, td_br_mis_adl, "event=0x00,umask=0x85", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-fetch-lat, td_fetch_lat_adl, "event=0x00,umask=0x86", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-mem-bound, td_mem_bound_adl, "event=0x00,umask=0x87", hybrid_big); + +static struct attribute *adl_hybrid_events_attrs[] = { + EVENT_PTR(slots_adl), + EVENT_PTR(td_retiring_adl), + EVENT_PTR(td_bad_spec_adl), + EVENT_PTR(td_fe_bound_adl), + EVENT_PTR(td_be_bound_adl), + EVENT_PTR(td_heavy_ops_adl), + EVENT_PTR(td_br_mis_adl), + EVENT_PTR(td_fetch_lat_adl), + EVENT_PTR(td_mem_bound_adl), + NULL, +}; + +/* Must be in IDX order */ +EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(mem-loads-aux, mem_ld_aux_adl, "event=0x03,umask=0x82", hybrid_big); + +static struct attribute *adl_hybrid_mem_attrs[] = { + EVENT_PTR(mem_ld_adl), + EVENT_PTR(mem_st_adl), + EVENT_PTR(mem_ld_aux_adl), + NULL, +}; + +EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-conflict, tx_conflict_adl, "event=0x54,umask=0x1", hybrid_big); +EVENT_ATTR_STR_HYBRID(cycles-t, cycles_t_adl, "event=0x3c,in_tx=1", hybrid_big); +EVENT_ATTR_STR_HYBRID(cycles-ct, cycles_ct_adl, "event=0x3c,in_tx=1,in_tx_cp=1", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-capacity-read, tx_capacity_read_adl, "event=0x54,umask=0x80", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-capacity-write, tx_capacity_write_adl, "event=0x54,umask=0x2", hybrid_big); + +static struct attribute *adl_hybrid_tsx_attrs[] = { + EVENT_PTR(tx_start_adl), + EVENT_PTR(tx_abort_adl), + EVENT_PTR(tx_commit_adl), + EVENT_PTR(tx_capacity_read_adl), + EVENT_PTR(tx_capacity_write_adl), + EVENT_PTR(tx_conflict_adl), + EVENT_PTR(cycles_t_adl), + EVENT_PTR(cycles_ct_adl), + NULL, +}; + +FORMAT_ATTR_HYBRID(in_tx, hybrid_big); +FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); +FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); +FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); +FORMAT_ATTR_HYBRID(frontend, hybrid_big); + +static struct attribute *adl_hybrid_extra_attr_rtm[] = { + FORMAT_HYBRID_PTR(in_tx), + FORMAT_HYBRID_PTR(in_tx_cp), + FORMAT_HYBRID_PTR(offcore_rsp), + FORMAT_HYBRID_PTR(ldlat), + FORMAT_HYBRID_PTR(frontend), + NULL, +}; + +static struct attribute *adl_hybrid_extra_attr[] = { + FORMAT_HYBRID_PTR(offcore_rsp), + FORMAT_HYBRID_PTR(ldlat), + FORMAT_HYBRID_PTR(frontend), + NULL, +}; + +static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + struct perf_pmu_events_hybrid_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr); + + return pmu->cpu_type & pmu_attr->pmu_type; +} + +static umode_t hybrid_events_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + return is_attr_for_this_pmu(kobj, attr) ? attr->mode : 0; +} + +static inline int hybrid_find_supported_cpu(struct x86_hybrid_pmu *pmu) +{ + int cpu = cpumask_first(&pmu->supported_cpus); + + return (cpu >= nr_cpu_ids) ? -1 : cpu; +} + +static umode_t hybrid_tsx_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + int cpu = hybrid_find_supported_cpu(pmu); + + return (cpu >= 0) && is_attr_for_this_pmu(kobj, attr) && cpu_has(&cpu_data(cpu), X86_FEATURE_RTM) ? attr->mode : 0; +} + +static umode_t hybrid_format_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + struct perf_pmu_format_hybrid_attr *pmu_attr = + container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr); + int cpu = hybrid_find_supported_cpu(pmu); + + return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0; +} + +static struct attribute_group hybrid_group_events_td = { + .name = "events", + .is_visible = hybrid_events_is_visible, +}; + +static struct attribute_group hybrid_group_events_mem = { + .name = "events", + .is_visible = hybrid_events_is_visible, +}; + +static struct attribute_group hybrid_group_events_tsx = { + .name = "events", + .is_visible = hybrid_tsx_is_visible, +}; + +static struct attribute_group hybrid_group_format_extra = { + .name = "format", + .is_visible = hybrid_format_is_visible, +}; + +static ssize_t intel_hybrid_get_attr_cpus(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + + return cpumap_print_to_pagebuf(true, buf, &pmu->supported_cpus); +} + +static DEVICE_ATTR(cpus, S_IRUGO, intel_hybrid_get_attr_cpus, NULL); +static struct attribute *intel_hybrid_cpus_attrs[] = { + &dev_attr_cpus.attr, + NULL, +}; + +static struct attribute_group hybrid_group_cpus = { + .attrs = intel_hybrid_cpus_attrs, +}; + +static const struct attribute_group *hybrid_attr_update[] = { + &hybrid_group_events_td, + &hybrid_group_events_mem, + &hybrid_group_events_tsx, + &group_caps_gen, + &group_caps_lbr, + &hybrid_group_format_extra, + &group_default, + &hybrid_group_cpus, + NULL, +}; + +static struct attribute *empty_attrs; + +static void intel_pmu_check_num_counters(int *num_counters, + int *num_counters_fixed, + u64 *intel_ctrl, u64 fixed_mask) +{ + if (*num_counters > INTEL_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + *num_counters, INTEL_PMC_MAX_GENERIC); + *num_counters = INTEL_PMC_MAX_GENERIC; + } + *intel_ctrl = (1ULL << *num_counters) - 1; + + if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + *num_counters_fixed, INTEL_PMC_MAX_FIXED); + *num_counters_fixed = INTEL_PMC_MAX_FIXED; + } + + *intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED; +} + +static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, + int num_counters, + int num_counters_fixed, + u64 intel_ctrl) +{ + struct event_constraint *c; + + if (!event_constraints) + return; + + /* + * event on fixed counter2 (REF_CYCLES) only works on this + * counter, so do not extend mask to generic counters + */ + for_each_event_constraint(c, event_constraints) { + /* + * Don't extend the topdown slots and metrics + * events to the generic counters. + */ + if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) { + /* + * Disable topdown slots and metrics events, + * if slots event is not in CPUID. + */ + if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl)) + c->idxmsk64 = 0; + c->weight = hweight64(c->idxmsk64); + continue; + } + + if (c->cmask == FIXED_EVENT_FLAGS) { + /* Disabled fixed counters which are not in CPUID */ + c->idxmsk64 &= intel_ctrl; + + /* + * Don't extend the pseudo-encoding to the + * generic counters + */ + if (!use_fixed_pseudo_encoding(c->code)) + c->idxmsk64 |= (1ULL << num_counters) - 1; + } + c->idxmsk64 &= + ~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); + } +} + +static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) +{ + struct extra_reg *er; + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (!extra_regs) + return; + + for (er = extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x11UL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } +} + +static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) +{ + struct x86_hybrid_pmu *pmu; + int i; + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + pmu = &x86_pmu.hybrid_pmu[i]; + + intel_pmu_check_num_counters(&pmu->num_counters, + &pmu->num_counters_fixed, + &pmu->intel_ctrl, + fixed_mask); + + if (pmu->intel_cap.perf_metrics) { + pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS; + } + + if (pmu->intel_cap.pebs_output_pt_available) + pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + + intel_pmu_check_event_constraints(pmu->event_constraints, + pmu->num_counters, + pmu->num_counters_fixed, + pmu->intel_ctrl); + + intel_pmu_check_extra_regs(pmu->extra_regs); + } +} + +__init int intel_pmu_init(void) +{ + struct attribute **extra_skl_attr = &empty_attrs; + struct attribute **extra_attr = &empty_attrs; + struct attribute **td_attr = &empty_attrs; + struct attribute **mem_attr = &empty_attrs; + struct attribute **tsx_attr = &empty_attrs; + union cpuid10_edx edx; + union cpuid10_eax eax; + union cpuid10_ebx ebx; + unsigned int fixed_mask; + bool pmem = false; + int version, i; + char *name; + struct x86_hybrid_pmu *pmu; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + switch (boot_cpu_data.x86) { + case 0x6: + return p6_pmu_init(); + case 0xb: + return knc_pmu_init(); + case 0xf: + return p4_pmu_init(); + } + return -ENODEV; + } + + /* + * Check whether the Architectural PerfMon supports + * Branch Misses Retired hw_event or not. + */ + cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full); + if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) + return -ENODEV; + + version = eax.split.version_id; + if (version < 2) + x86_pmu = core_pmu; + else + x86_pmu = intel_pmu; + + x86_pmu.version = version; + x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.cntval_bits = eax.split.bit_width; + x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; + + x86_pmu.events_maskl = ebx.full; + x86_pmu.events_mask_len = eax.split.mask_length; + + x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + x86_pmu.pebs_capable = PEBS_COUNTER_MASK; + + /* + * Quirk: v2 perfmon does not report fixed-purpose events, so + * assume at least 3 events, when not running in a hypervisor: + */ + if (version > 1 && version < 5) { + int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); + + x86_pmu.num_counters_fixed = + max((int)edx.split.num_counters_fixed, assume); + + fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1; + } else if (version >= 5) + x86_pmu.num_counters_fixed = fls(fixed_mask); + + if (boot_cpu_has(X86_FEATURE_PDCM)) { + u64 capabilities; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + x86_pmu.intel_cap.capabilities = capabilities; + } + + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) { + x86_pmu.lbr_reset = intel_pmu_lbr_reset_32; + x86_pmu.lbr_read = intel_pmu_lbr_read_32; + } + + if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) + intel_pmu_arch_lbr_init(); + + intel_ds_init(); + + x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + + if (version >= 5) { + x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; + if (x86_pmu.intel_cap.anythread_deprecated) + pr_cont(" AnyThread deprecated, "); + } + + /* + * Install the hw-cache-events table: + */ + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_CORE_YONAH: + pr_cont("Core events, "); + name = "core"; + break; + + case INTEL_FAM6_CORE2_MEROM: + x86_add_quirk(intel_clovertown_quirk); + fallthrough; + + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + intel_pmu_lbr_init_core(); + + x86_pmu.event_constraints = intel_core2_event_constraints; + x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; + pr_cont("Core2 events, "); + name = "core2"; + break; + + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_nhm(); + + x86_pmu.event_constraints = intel_nehalem_event_constraints; + x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; + x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.extra_regs = intel_nehalem_extra_regs; + x86_pmu.limit_period = nhm_limit_period; + + mem_attr = nhm_mem_events_attrs; + + /* UOPS_ISSUED.STALLED_CYCLES */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + + intel_pmu_pebs_data_source_nhm(); + x86_add_quirk(intel_nehalem_quirk); + x86_pmu.pebs_no_tlb = 1; + extra_attr = nhm_format_attr; + + pr_cont("Nehalem events, "); + name = "nehalem"; + break; + + case INTEL_FAM6_ATOM_BONNELL: + case INTEL_FAM6_ATOM_BONNELL_MID: + case INTEL_FAM6_ATOM_SALTWELL: + case INTEL_FAM6_ATOM_SALTWELL_MID: + case INTEL_FAM6_ATOM_SALTWELL_TABLET: + memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + intel_pmu_lbr_init_atom(); + + x86_pmu.event_constraints = intel_gen_event_constraints; + x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_core2; + pr_cont("Atom events, "); + name = "bonnell"; + break; + + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_D: + case INTEL_FAM6_ATOM_SILVERMONT_MID: + case INTEL_FAM6_ATOM_AIRMONT: + case INTEL_FAM6_ATOM_AIRMONT_MID: + memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_slm(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; + x86_pmu.extra_regs = intel_slm_extra_regs; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + td_attr = slm_events_attrs; + extra_attr = slm_format_attr; + pr_cont("Silvermont events, "); + name = "silvermont"; + break; + + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_GOLDMONT_D: + memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; + x86_pmu.extra_regs = intel_glm_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + * :pp is identical to :ppp + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + td_attr = glm_events_attrs; + extra_attr = slm_format_attr; + pr_cont("Goldmont events, "); + name = "goldmont"; + break; + + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.extra_regs = intel_glm_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.pebs_capable = ~0ULL; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.get_event_constraints = glp_get_event_constraints; + td_attr = glm_events_attrs; + /* Goldmont Plus has 4-wide pipeline */ + event_attr_td_total_slots_scale_glm.event_str = "4"; + extra_attr = slm_format_attr; + pr_cont("Goldmont plus events, "); + name = "goldmont_plus"; + break; + + case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: + case INTEL_FAM6_ATOM_TREMONT_L: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.extra_regs = intel_tnt_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.get_event_constraints = tnt_get_event_constraints; + td_attr = tnt_events_attrs; + extra_attr = slm_format_attr; + pr_cont("Tremont events, "); + name = "Tremont"; + break; + + case INTEL_FAM6_ALDERLAKE_N: + x86_pmu.mid_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + x86_pmu.extra_regs = intel_grt_extra_regs; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + intel_pmu_pebs_data_source_grt(); + x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.get_event_constraints = tnt_get_event_constraints; + x86_pmu.limit_period = spr_limit_period; + td_attr = tnt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = nhm_format_attr; + pr_cont("Gracemont events, "); + name = "gracemont"; + break; + + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_nhm(); + + x86_pmu.event_constraints = intel_westmere_event_constraints; + x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; + x86_pmu.extra_regs = intel_westmere_extra_regs; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + + mem_attr = nhm_mem_events_attrs; + + /* UOPS_ISSUED.STALLED_CYCLES */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + + intel_pmu_pebs_data_source_nhm(); + extra_attr = nhm_format_attr; + pr_cont("Westmere events, "); + name = "westmere"; + break; + + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + x86_add_quirk(intel_sandybridge_quirk); + x86_add_quirk(intel_ht_bug); + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; + + + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + td_attr = snb_events_attrs; + mem_attr = snb_mem_events_attrs; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); + + extra_attr = nhm_format_attr; + + pr_cont("SandyBridge events, "); + name = "sandybridge"; + break; + + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + x86_add_quirk(intel_ht_bug); + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_ivb_event_constraints; + x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; + x86_pmu.pebs_prec_dist = true; + if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + td_attr = snb_events_attrs; + mem_attr = snb_mem_events_attrs; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + + extra_attr = nhm_format_attr; + + pr_cont("IvyBridge events, "); + name = "ivybridge"; + break; + + + case INTEL_FAM6_HASWELL: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_L: + case INTEL_FAM6_HASWELL_G: + x86_add_quirk(intel_ht_bug); + x86_add_quirk(intel_pebs_isolation_quirk); + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_hsw(); + + x86_pmu.event_constraints = intel_hsw_event_constraints; + x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.extra_regs = intel_snbep_extra_regs; + x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; + x86_pmu.pebs_prec_dist = true; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.lbr_double_abort = true; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; + mem_attr = hsw_mem_events_attrs; + tsx_attr = hsw_tsx_events_attrs; + pr_cont("Haswell events, "); + name = "haswell"; + break; + + case INTEL_FAM6_BROADWELL: + case INTEL_FAM6_BROADWELL_D: + case INTEL_FAM6_BROADWELL_G: + case INTEL_FAM6_BROADWELL_X: + x86_add_quirk(intel_pebs_isolation_quirk); + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ + hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | + BDW_L3_MISS|HSW_SNOOP_DRAM; + hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| + HSW_SNOOP_DRAM; + hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| + BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; + hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| + BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; + + intel_pmu_lbr_init_hsw(); + + x86_pmu.event_constraints = intel_bdw_event_constraints; + x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; + x86_pmu.extra_regs = intel_snbep_extra_regs; + x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; + x86_pmu.pebs_prec_dist = true; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.limit_period = bdw_limit_period; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; + mem_attr = hsw_mem_events_attrs; + tsx_attr = hsw_tsx_events_attrs; + pr_cont("Broadwell events, "); + name = "broadwell"; + break; + + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + memcpy(hw_cache_event_ids, + slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, + knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + intel_pmu_lbr_init_knl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; + x86_pmu.extra_regs = intel_knl_extra_regs; + + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + extra_attr = slm_format_attr; + pr_cont("Knights Landing/Mill events, "); + name = "knights-landing"; + break; + + case INTEL_FAM6_SKYLAKE_X: + pmem = true; + fallthrough; + case INTEL_FAM6_SKYLAKE_L: + case INTEL_FAM6_SKYLAKE: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: + x86_add_quirk(intel_pebs_isolation_quirk); + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + intel_pmu_lbr_init_skl(); + + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + + x86_pmu.event_constraints = intel_skl_event_constraints; + x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; + x86_pmu.extra_regs = intel_skl_extra_regs; + x86_pmu.pebs_aliases = intel_pebs_aliases_skl; + x86_pmu.pebs_prec_dist = true; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_skl_attr = skl_format_attr; + td_attr = hsw_events_attrs; + mem_attr = hsw_mem_events_attrs; + tsx_attr = hsw_tsx_events_attrs; + intel_pmu_pebs_data_source_skl(pmem); + + /* + * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default. + * TSX force abort hooks are not required on these systems. Only deploy + * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT. + */ + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) && + !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { + x86_pmu.flags |= PMU_FL_TFA; + x86_pmu.get_event_constraints = tfa_get_event_constraints; + x86_pmu.enable_all = intel_tfa_pmu_enable_all; + x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; + } + + pr_cont("Skylake events, "); + name = "skylake"; + break; + + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + x86_pmu.pebs_ept = 1; + pmem = true; + fallthrough; + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_ICELAKE: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_icl_event_constraints; + x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints; + x86_pmu.extra_regs = intel_icl_extra_regs; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = icl_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_skl_attr = skl_format_attr; + mem_attr = icl_events_attrs; + td_attr = icl_td_events_attrs; + tsx_attr = icl_tsx_events_attrs; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_skl(pmem); + x86_pmu.num_topdown_events = 4; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); + pr_cont("Icelake events, "); + name = "icelake"; + break; + + case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: + pmem = true; + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + x86_pmu.event_constraints = intel_spr_event_constraints; + x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; + x86_pmu.extra_regs = intel_spr_extra_regs; + x86_pmu.limit_period = spr_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = spr_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_skl_attr = skl_format_attr; + mem_attr = spr_events_attrs; + td_attr = spr_td_events_attrs; + tsx_attr = spr_tsx_events_attrs; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_skl(pmem); + x86_pmu.num_topdown_events = 8; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); + pr_cont("Sapphire Rapids events, "); + name = "sapphire_rapids"; + break; + + case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: + case INTEL_FAM6_RAPTORLAKE_P: + case INTEL_FAM6_RAPTORLAKE_S: + /* + * Alder Lake has 2 types of CPU, core and atom. + * + * Initialize the common PerfMon capabilities here. + */ + x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS, + sizeof(struct x86_hybrid_pmu), + GFP_KERNEL); + if (!x86_pmu.hybrid_pmu) + return -ENOMEM; + static_branch_enable(&perf_is_hybrid); + x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_adl(); + x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.num_topdown_events = 8; + static_call_update(intel_pmu_update_topdown_event, + &adl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &adl_set_topdown_event_period); + + x86_pmu.filter_match = intel_pmu_filter_match; + x86_pmu.get_event_constraints = adl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + x86_pmu.limit_period = spr_limit_period; + x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; + /* + * The rtm_abort_event is used to check whether to enable GPRs + * for the RTM abort event. Atom doesn't have the RTM abort + * event. There is no harmful to set it in the common + * x86_pmu.rtm_abort_event. + */ + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + + td_attr = adl_hybrid_events_attrs; + mem_attr = adl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + adl_hybrid_extra_attr_rtm : adl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + pmu->name = "cpu_core"; + pmu->cpu_type = hybrid_big; + pmu->late_ack = true; + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { + pmu->num_counters = x86_pmu.num_counters + 2; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; + } else { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } + + /* + * Quirk: For some Alder Lake machine, when all E-cores are disabled in + * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However, + * the X86_FEATURE_HYBRID_CPU is still set. The above codes will + * mistakenly add extra counters for P-cores. Correct the number of + * counters here. + */ + if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } + + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + pmu->intel_cap.perf_metrics = 1; + pmu->intel_cap.pebs_output_pt_available = 0; + + memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->event_constraints = intel_spr_event_constraints; + pmu->pebs_constraints = intel_spr_pebs_event_constraints; + pmu->extra_regs = intel_spr_extra_regs; + + /* Initialize Atom core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + pmu->name = "cpu_atom"; + pmu->cpu_type = hybrid_small; + pmu->mid_ack = true; + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + pmu->max_pebs_events = x86_pmu.max_pebs_events; + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + pmu->intel_cap.perf_metrics = 0; + pmu->intel_cap.pebs_output_pt_available = 1; + + memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + pmu->event_constraints = intel_slm_event_constraints; + pmu->pebs_constraints = intel_grt_pebs_event_constraints; + pmu->extra_regs = intel_grt_extra_regs; + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + break; + + default: + switch (x86_pmu.version) { + case 1: + x86_pmu.event_constraints = intel_v1_event_constraints; + pr_cont("generic architected perfmon v1, "); + name = "generic_arch_v1"; + break; + case 2: + case 3: + case 4: + /* + * default constraints for v2 and up + */ + x86_pmu.event_constraints = intel_gen_event_constraints; + pr_cont("generic architected perfmon, "); + name = "generic_arch_v2+"; + break; + default: + /* + * The default constraints for v5 and up can support up to + * 16 fixed counters. For the fixed counters 4 and later, + * the pseudo-encoding is applied. + * The constraints may be cut according to the CPUID enumeration + * by inserting the EVENT_CONSTRAINT_END. + */ + if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) + x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; + intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1; + x86_pmu.event_constraints = intel_v5_gen_event_constraints; + pr_cont("generic architected perfmon, "); + name = "generic_arch_v5+"; + break; + } + } + + snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); + + if (!is_hybrid()) { + group_events_td.attrs = td_attr; + group_events_mem.attrs = mem_attr; + group_events_tsx.attrs = tsx_attr; + group_format_extra.attrs = extra_attr; + group_format_extra_skl.attrs = extra_skl_attr; + + x86_pmu.attr_update = attr_update; + } else { + hybrid_group_events_td.attrs = td_attr; + hybrid_group_events_mem.attrs = mem_attr; + hybrid_group_events_tsx.attrs = tsx_attr; + hybrid_group_format_extra.attrs = extra_attr; + + x86_pmu.attr_update = hybrid_attr_update; + } + + intel_pmu_check_num_counters(&x86_pmu.num_counters, + &x86_pmu.num_counters_fixed, + &x86_pmu.intel_ctrl, + (u64)fixed_mask); + + /* AnyThread may be deprecated on arch perfmon v5 or later */ + if (x86_pmu.intel_cap.anythread_deprecated) + x86_pmu.format_attrs = intel_arch_formats_attr; + + intel_pmu_check_event_constraints(x86_pmu.event_constraints, + x86_pmu.num_counters, + x86_pmu.num_counters_fixed, + x86_pmu.intel_ctrl); + /* + * Access LBR MSR may cause #GP under certain circumstances. + * Check all LBR MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + if (x86_pmu.lbr_nr) { + intel_pmu_lbr_init(); + + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + + /* only support branch_stack snapshot for perfmon >= v2 */ + if (x86_pmu.disable_all == intel_pmu_disable_all) { + if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) { + static_call_update(perf_snapshot_branch_stack, + intel_pmu_snapshot_arch_branch_stack); + } else { + static_call_update(perf_snapshot_branch_stack, + intel_pmu_snapshot_branch_stack); + } + } + } + + intel_pmu_check_extra_regs(x86_pmu.extra_regs); + + /* Support full width counters using alternative MSR range */ + if (x86_pmu.intel_cap.full_width_write) { + x86_pmu.max_period = x86_pmu.cntval_mask >> 1; + x86_pmu.perfctr = MSR_IA32_PMC0; + pr_cont("full-width counters, "); + } + + if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) + x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + + if (is_hybrid()) + intel_pmu_check_hybrid_pmus((u64)fixed_mask); + + intel_aux_output_init(); + + return 0; +} + +/* + * HT bug: phase 2 init + * Called once we have valid topology information to check + * whether or not HT is enabled + * If HT is off, then we disable the workaround + */ +static __init int fixup_ht_bug(void) +{ + int c; + /* + * problem not present on this CPU model, nothing to do + */ + if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) + return 0; + + if (topology_max_smt_threads() > 1) { + pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); + return 0; + } + + cpus_read_lock(); + + hardlockup_detector_perf_stop(); + + x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); + + x86_pmu.start_scheduling = NULL; + x86_pmu.commit_scheduling = NULL; + x86_pmu.stop_scheduling = NULL; + + hardlockup_detector_perf_restart(); + + for_each_online_cpu(c) + free_excl_cntrs(&per_cpu(cpu_hw_events, c)); + + cpus_read_unlock(); + pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); + return 0; +} +subsys_initcall(fixup_ht_bug) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c new file mode 100644 index 000000000..551741e79 --- /dev/null +++ b/arch/x86/events/intel/cstate.c @@ -0,0 +1,793 @@ +/* + * Support cstate residency counters + * + * Copyright (C) 2015, Intel Corp. + * Author: Kan Liang (kan.liang@intel.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + */ + +/* + * This file export cstate related free running (read-only) counters + * for perf. These counters may be use simultaneously by other tools, + * such as turbostat. However, it still make sense to implement them + * in perf. Because we can conveniently collect them together with + * other events, and allow to use them from tools without special MSR + * access code. + * + * The events only support system-wide mode counting. There is no + * sampling support because it is not supported by the hardware. + * + * According to counters' scope and category, two PMUs are registered + * with the perf_event core subsystem. + * - 'cstate_core': The counter is available for each physical core. + * The counters include CORE_C*_RESIDENCY. + * - 'cstate_pkg': The counter is available for each physical package. + * The counters include PKG_C*_RESIDENCY. + * + * All of these counters are specified in the Intel® 64 and IA-32 + * Architectures Software Developer.s Manual Vol3b. + * + * Model specific counters: + * MSR_CORE_C1_RES: CORE C1 Residency Counter + * perf code: 0x00 + * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL + * MTL + * Scope: Core (each processor core has a MSR) + * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter + * perf code: 0x01 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, + * CNL,KBL,CML,TNT + * Scope: Core + * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, + * TGL,TNT,RKL,ADL,RPL,SPR,MTL + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 + * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, + * ICL,TGL,RKL,ADL,RPL,MTL + * Scope: Core + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, + * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, + * RPL,SPR,MTL + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, + * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, + * ADL,RPL,MTL + * Scope: Package (physical package) + * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, + * TGL,TNT,RKL,ADL,RPL,SPR,MTL + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, + * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL + * Scope: Package (physical package) + * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. + * perf code: 0x04 + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, + * ADL,RPL,MTL + * Scope: Package (physical package) + * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. + * perf code: 0x05 + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, + * ADL,RPL,MTL + * Scope: Package (physical package) + * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. + * perf code: 0x06 + * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, + * TNT,RKL,ADL,RPL,MTL + * Scope: Package (physical package) + * + */ + +#include +#include +#include +#include +#include +#include +#include "../perf_event.h" +#include "../probe.h" + +MODULE_LICENSE("GPL"); + +#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct device_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __cstate_##_var##_show, NULL) + +static ssize_t cstate_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf); + +/* Model -> events mapping */ +struct cstate_model { + unsigned long core_events; + unsigned long pkg_events; + unsigned long quirks; +}; + +/* Quirk flags */ +#define SLM_PKG_C6_USE_C7_MSR (1UL << 0) +#define KNL_CORE_C6_MSR (1UL << 1) + +struct perf_cstate_msr { + u64 msr; + struct perf_pmu_events_attr *attr; +}; + + +/* cstate_core PMU */ +static struct pmu cstate_core_pmu; +static bool has_cstate_core; + +enum perf_cstate_core_events { + PERF_CSTATE_CORE_C1_RES = 0, + PERF_CSTATE_CORE_C3_RES, + PERF_CSTATE_CORE_C6_RES, + PERF_CSTATE_CORE_C7_RES, + + PERF_CSTATE_CORE_EVENT_MAX, +}; + +PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03"); + +static unsigned long core_msr_mask; + +PMU_EVENT_GROUP(events, cstate_core_c1); +PMU_EVENT_GROUP(events, cstate_core_c3); +PMU_EVENT_GROUP(events, cstate_core_c6); +PMU_EVENT_GROUP(events, cstate_core_c7); + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr core_msr[] = { + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr }, +}; + +static struct attribute *attrs_empty[] = { + NULL, +}; + +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ +static struct attribute_group core_events_attr_group = { + .name = "events", + .attrs = attrs_empty, +}; + +DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); +static struct attribute *core_format_attrs[] = { + &format_attr_core_event.attr, + NULL, +}; + +static struct attribute_group core_format_attr_group = { + .name = "format", + .attrs = core_format_attrs, +}; + +static cpumask_t cstate_core_cpu_mask; +static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL); + +static struct attribute *cstate_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cstate_cpumask_attrs, +}; + +static const struct attribute_group *core_attr_groups[] = { + &core_events_attr_group, + &core_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +/* cstate_pkg PMU */ +static struct pmu cstate_pkg_pmu; +static bool has_cstate_pkg; + +enum perf_cstate_pkg_events { + PERF_CSTATE_PKG_C2_RES = 0, + PERF_CSTATE_PKG_C3_RES, + PERF_CSTATE_PKG_C6_RES, + PERF_CSTATE_PKG_C7_RES, + PERF_CSTATE_PKG_C8_RES, + PERF_CSTATE_PKG_C9_RES, + PERF_CSTATE_PKG_C10_RES, + + PERF_CSTATE_PKG_EVENT_MAX, +}; + +PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04"); +PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05"); +PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06"); + +static unsigned long pkg_msr_mask; + +PMU_EVENT_GROUP(events, cstate_pkg_c2); +PMU_EVENT_GROUP(events, cstate_pkg_c3); +PMU_EVENT_GROUP(events, cstate_pkg_c6); +PMU_EVENT_GROUP(events, cstate_pkg_c7); +PMU_EVENT_GROUP(events, cstate_pkg_c8); +PMU_EVENT_GROUP(events, cstate_pkg_c9); +PMU_EVENT_GROUP(events, cstate_pkg_c10); + +static struct perf_msr pkg_msr[] = { + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, +}; + +static struct attribute_group pkg_events_attr_group = { + .name = "events", + .attrs = attrs_empty, +}; + +DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); +static struct attribute *pkg_format_attrs[] = { + &format_attr_pkg_event.attr, + NULL, +}; +static struct attribute_group pkg_format_attr_group = { + .name = "format", + .attrs = pkg_format_attrs, +}; + +static cpumask_t cstate_pkg_cpu_mask; + +static const struct attribute_group *pkg_attr_groups[] = { + &pkg_events_attr_group, + &pkg_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +static ssize_t cstate_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + + if (pmu == &cstate_core_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); + else if (pmu == &cstate_pkg_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); + else + return 0; +} + +static int cstate_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config; + int cpu; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->pmu == &cstate_core_pmu) { + if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) + return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX); + if (!(core_msr_mask & (1 << cfg))) + return -EINVAL; + event->hw.event_base = core_msr[cfg].msr; + cpu = cpumask_any_and(&cstate_core_cpu_mask, + topology_sibling_cpumask(event->cpu)); + } else if (event->pmu == &cstate_pkg_pmu) { + if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) + return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); + if (!(pkg_msr_mask & (1 << cfg))) + return -EINVAL; + event->hw.event_base = pkg_msr[cfg].msr; + cpu = cpumask_any_and(&cstate_pkg_cpu_mask, + topology_die_cpumask(event->cpu)); + } else { + return -ENOENT; + } + + if (cpu >= nr_cpu_ids) + return -ENODEV; + + event->cpu = cpu; + event->hw.config = cfg; + event->hw.idx = -1; + return 0; +} + +static inline u64 cstate_pmu_read_counter(struct perf_event *event) +{ + u64 val; + + rdmsrl(event->hw.event_base, val); + return val; +} + +static void cstate_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = cstate_pmu_read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + local64_add(new_raw_count - prev_raw_count, &event->count); +} + +static void cstate_pmu_event_start(struct perf_event *event, int mode) +{ + local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event)); +} + +static void cstate_pmu_event_stop(struct perf_event *event, int mode) +{ + cstate_pmu_event_update(event); +} + +static void cstate_pmu_event_del(struct perf_event *event, int mode) +{ + cstate_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int cstate_pmu_event_add(struct perf_event *event, int mode) +{ + if (mode & PERF_EF_START) + cstate_pmu_event_start(event, mode); + + return 0; +} + +/* + * Check if exiting cpu is the designated reader. If so migrate the + * events when there is a valid target available + */ +static int cstate_cpu_exit(unsigned int cpu) +{ + unsigned int target; + + if (has_cstate_core && + cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { + + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &cstate_core_cpu_mask); + perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); + } + } + + if (has_cstate_pkg && + cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { + + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &cstate_pkg_cpu_mask); + perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); + } + } + return 0; +} + +static int cstate_cpu_init(unsigned int cpu) +{ + unsigned int target; + + /* + * If this is the first online thread of that core, set it in + * the core cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_core_cpu_mask, + topology_sibling_cpumask(cpu)); + + if (has_cstate_core && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_core_cpu_mask); + + /* + * If this is the first online thread of that package, set it + * in the package cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_pkg_cpu_mask, + topology_die_cpumask(cpu)); + if (has_cstate_pkg && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); + + return 0; +} + +static const struct attribute_group *core_attr_update[] = { + &group_cstate_core_c1, + &group_cstate_core_c3, + &group_cstate_core_c6, + &group_cstate_core_c7, + NULL, +}; + +static const struct attribute_group *pkg_attr_update[] = { + &group_cstate_pkg_c2, + &group_cstate_pkg_c3, + &group_cstate_pkg_c6, + &group_cstate_pkg_c7, + &group_cstate_pkg_c8, + &group_cstate_pkg_c9, + &group_cstate_pkg_c10, + NULL, +}; + +static struct pmu cstate_core_pmu = { + .attr_groups = core_attr_groups, + .attr_update = core_attr_update, + .name = "cstate_core", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, +}; + +static struct pmu cstate_pkg_pmu = { + .attr_groups = pkg_attr_groups, + .attr_update = pkg_attr_update, + .name = "cstate_pkg", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, +}; + +static const struct cstate_model nhm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES), +}; + +static const struct cstate_model snb_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES), +}; + +static const struct cstate_model hswult_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model cnl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model icl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model icx_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), +}; + +static const struct cstate_model adl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model slm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = SLM_PKG_C6_USE_C7_MSR, +}; + + +static const struct cstate_model knl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = KNL_CORE_C6_MSR, +}; + + +static const struct cstate_model glm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + + +static const struct x86_cpu_id intel_cstates_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates), + { }, +}; +MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); + +static int __init cstate_probe(const struct cstate_model *cm) +{ + /* SLM has different MSR for PKG C6 */ + if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) + pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + + /* KNL has different MSR for CORE C6 */ + if (cm->quirks & KNL_CORE_C6_MSR) + pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; + + + core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX, + true, (void *) &cm->core_events); + + pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, + true, (void *) &cm->pkg_events); + + has_cstate_core = !!core_msr_mask; + has_cstate_pkg = !!pkg_msr_mask; + + return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; +} + +static inline void cstate_cleanup(void) +{ + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); + + if (has_cstate_core) + perf_pmu_unregister(&cstate_core_pmu); + + if (has_cstate_pkg) + perf_pmu_unregister(&cstate_pkg_pmu); +} + +static int __init cstate_init(void) +{ + int err; + + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, + "perf/x86/cstate:starting", cstate_cpu_init, NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, + "perf/x86/cstate:online", NULL, cstate_cpu_exit); + + if (has_cstate_core) { + err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); + if (err) { + has_cstate_core = false; + pr_info("Failed to register cstate core pmu\n"); + cstate_cleanup(); + return err; + } + } + + if (has_cstate_pkg) { + if (topology_max_die_per_package() > 1) { + err = perf_pmu_register(&cstate_pkg_pmu, + "cstate_die", -1); + } else { + err = perf_pmu_register(&cstate_pkg_pmu, + cstate_pkg_pmu.name, -1); + } + if (err) { + has_cstate_pkg = false; + pr_info("Failed to register cstate pkg pmu\n"); + cstate_cleanup(); + return err; + } + } + return 0; +} + +static int __init cstate_pmu_init(void) +{ + const struct x86_cpu_id *id; + int err; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(intel_cstates_match); + if (!id) + return -ENODEV; + + err = cstate_probe((const struct cstate_model *) id->driver_data); + if (err) + return err; + + return cstate_init(); +} +module_init(cstate_pmu_init); + +static void __exit cstate_pmu_exit(void) +{ + cstate_cleanup(); +} +module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c new file mode 100644 index 000000000..3ff38e740 --- /dev/null +++ b/arch/x86/events/intel/ds.c @@ -0,0 +1,2369 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../perf_event.h" + +/* Waste a full page so it can be mapped into the cpu_entry_area */ +DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +#define PEBS_FIXUP_SIZE PAGE_SIZE + +/* + * pebs_record_32 for p4 and core not supported + +struct pebs_record_32 { + u32 flags, ip; + u32 ax, bc, cx, dx; + u32 si, di, bp, sp; +}; + + */ + +union intel_x86_pebs_dse { + u64 val; + struct { + unsigned int ld_dse:4; + unsigned int ld_stlb_miss:1; + unsigned int ld_locked:1; + unsigned int ld_data_blk:1; + unsigned int ld_addr_blk:1; + unsigned int ld_reserved:24; + }; + struct { + unsigned int st_l1d_hit:1; + unsigned int st_reserved1:3; + unsigned int st_stlb_miss:1; + unsigned int st_locked:1; + unsigned int st_reserved2:26; + }; + struct { + unsigned int st_lat_dse:4; + unsigned int st_lat_stlb_miss:1; + unsigned int st_lat_locked:1; + unsigned int ld_reserved3:26; + }; +}; + + +/* + * Map PEBS Load Latency Data Source encodings to generic + * memory data source information + */ +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +/* Version for Sandy Bridge and later */ +static u64 pebs_data_source[] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ +}; + +/* Patch up minor differences in the bits */ +void __init intel_pmu_pebs_data_source_nhm(void) +{ + pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); +} + +static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source) +{ + u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); + + data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); + data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); + data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); + data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_skl(bool pmem) +{ + __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); +} + +static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source) +{ + data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); +} + +void __init intel_pmu_pebs_data_source_grt(void) +{ + __intel_pmu_pebs_data_source_grt(pebs_data_source); +} + +void __init intel_pmu_pebs_data_source_adl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_skl(false, data_source); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_grt(data_source); +} + +static u64 precise_store_data(u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); + + dse.val = status; + + /* + * bit 4: TLB access + * 1 = stored missed 2nd level TLB + * + * so it either hit the walker or the OS + * otherwise hit 2nd level TLB + */ + if (dse.st_stlb_miss) + val |= P(TLB, MISS); + else + val |= P(TLB, HIT); + + /* + * bit 0: hit L1 data cache + * if not set, then all we know is that + * it missed L1D + */ + if (dse.st_l1d_hit) + val |= P(LVL, HIT); + else + val |= P(LVL, MISS); + + /* + * bit 5: Locked prefix + */ + if (dse.st_locked) + val |= P(LOCK, LOCKED); + + return val; +} + +static u64 precise_datala_hsw(struct perf_event *event, u64 status) +{ + union perf_mem_data_src dse; + + dse.val = PERF_MEM_NA; + + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + dse.mem_op = PERF_MEM_OP_STORE; + else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) + dse.mem_op = PERF_MEM_OP_LOAD; + + /* + * L1 info only valid for following events: + * + * MEM_UOPS_RETIRED.STLB_MISS_STORES + * MEM_UOPS_RETIRED.LOCK_STORES + * MEM_UOPS_RETIRED.SPLIT_STORES + * MEM_UOPS_RETIRED.ALL_STORES + */ + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { + if (status & 1) + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + else + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + } + return dse.val; +} + +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Retrieve the latency data for e-core of ADL */ +u64 adl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val; + + WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + + dse.val = status; + + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + + /* + * For the atom core on ADL, + * bit 4: lock, bit 5: TLB access. + */ + pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss); + + if (dse.ld_data_blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +static u64 load_latency_data(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val; + + dse.val = status; + + /* + * use the mapping table for bit 0-3 + */ + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + + /* + * Nehalem models do not support TLB, Lock infos + */ + if (x86_pmu.pebs_no_tlb) { + val |= P(TLB, NA) | P(LOCK, NA); + return val; + } + + pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked); + + /* + * Ice Lake and earlier models do not support block infos. + */ + if (!x86_pmu.pebs_block) { + val |= P(BLK, NA); + return val; + } + /* + * bit 6: load was blocked since its data could not be forwarded + * from a preceding store + */ + if (dse.ld_data_blk) + val |= P(BLK, DATA); + + /* + * bit 7: load was blocked due to potential address conflict with + * a preceding store + */ + if (dse.ld_addr_blk) + val |= P(BLK, ADDR); + + if (!dse.ld_data_blk && !dse.ld_addr_blk) + val |= P(BLK, NA); + + return val; +} + +static u64 store_latency_data(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + union perf_mem_data_src src; + u64 val; + + dse.val = status; + + /* + * use the mapping table for bit 0-3 + */ + val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse]; + + pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked); + + val |= P(BLK, NA); + + /* + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead + */ + src.val = val; + src.mem_op = P(OP,STORE); + + return src.val; +} + +struct pebs_record_core { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; +}; + +struct pebs_record_nhm { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; +}; + +/* + * Same as pebs_record_nhm, with two additional fields. + */ +struct pebs_record_hsw { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; + u64 real_ip, tsx_tuning; +}; + +union hsw_tsx_tuning { + struct { + u32 cycles_last_block : 32, + hle_abort : 1, + rtm_abort : 1, + instruction_abort : 1, + non_instruction_abort : 1, + retry : 1, + data_conflict : 1, + capacity_writes : 1, + capacity_reads : 1; + }; + u64 value; +}; + +#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL + +/* Same as HSW, plus TSC */ + +struct pebs_record_skl { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; + u64 real_ip, tsx_tuning; + u64 tsc; +}; + +void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_events, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static DEFINE_PER_CPU(void *, insn_buffer); + +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) +{ + unsigned long start = (unsigned long)cea; + phys_addr_t pa; + size_t msz = 0; + + pa = virt_to_phys(addr); + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, pa, prot); + + /* + * This is a cross-CPU update of the cpu_entry_area, we must shoot down + * all TLB entries for it. + */ + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void ds_clear_cea(void *cea, size_t size) +{ + unsigned long start = (unsigned long)cea; + size_t msz = 0; + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); + + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); + int node = cpu_to_node(cpu); + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int max, node = cpu_to_node(cpu); + void *buffer, *insn_buff, *cea; + + if (!x86_pmu.pebs) + return 0; + + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); + if (unlikely(!buffer)) + return -ENOMEM; + + /* + * HSW+ already provides us the eventing ip; no need to allocate this + * buffer then. + */ + if (x86_pmu.intel_cap.pebs_format < 2) { + insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!insn_buff) { + dsfree_pages(buffer, bsiz); + return -ENOMEM; + } + per_cpu(insn_buffer, cpu) = insn_buff; + } + hwev->ds_pebs_vaddr = buffer; + /* Update the cpu entry area mapping */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds->pebs_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); + ds->pebs_index = ds->pebs_buffer_base; + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; + return 0; +} + +static void release_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + void *cea; + + if (!x86_pmu.pebs) + return; + + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; + + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; +} + +static int alloc_bts_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *buffer, *cea; + int max; + + if (!x86_pmu.bts) + return 0; + + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); + if (unlikely(!buffer)) { + WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); + return -ENOMEM; + } + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds->bts_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); + ds->bts_index = ds->bts_buffer_base; + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + (max / 16) * BTS_RECORD_SIZE; + return 0; +} + +static void release_bts_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + void *cea; + + if (!x86_pmu.bts) + return; + + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds_clear_cea(cea, BTS_BUFFER_SIZE); + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; +} + +static int alloc_ds_buffer(int cpu) +{ + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + + memset(ds, 0, sizeof(*ds)); + per_cpu(cpu_hw_events, cpu).ds = ds; + return 0; +} + +static void release_ds_buffer(int cpu) +{ + per_cpu(cpu_hw_events, cpu).ds = NULL; +} + +void release_ds_buffers(void) +{ + int cpu; + + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + for_each_possible_cpu(cpu) + release_ds_buffer(cpu); + + for_each_possible_cpu(cpu) { + /* + * Again, ignore errors from offline CPUs, they will no longer + * observe cpu_hw_events.ds and not program the DS_AREA when + * they come up. + */ + fini_debug_store_on_cpu(cpu); + } + + for_each_possible_cpu(cpu) { + release_pebs_buffer(cpu); + release_bts_buffer(cpu); + } +} + +void reserve_ds_buffers(void) +{ + int bts_err = 0, pebs_err = 0; + int cpu; + + x86_pmu.bts_active = 0; + x86_pmu.pebs_active = 0; + + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + if (!x86_pmu.bts) + bts_err = 1; + + if (!x86_pmu.pebs) + pebs_err = 1; + + for_each_possible_cpu(cpu) { + if (alloc_ds_buffer(cpu)) { + bts_err = 1; + pebs_err = 1; + } + + if (!bts_err && alloc_bts_buffer(cpu)) + bts_err = 1; + + if (!pebs_err && alloc_pebs_buffer(cpu)) + pebs_err = 1; + + if (bts_err && pebs_err) + break; + } + + if (bts_err) { + for_each_possible_cpu(cpu) + release_bts_buffer(cpu); + } + + if (pebs_err) { + for_each_possible_cpu(cpu) + release_pebs_buffer(cpu); + } + + if (bts_err && pebs_err) { + for_each_possible_cpu(cpu) + release_ds_buffer(cpu); + } else { + if (x86_pmu.bts && !bts_err) + x86_pmu.bts_active = 1; + + if (x86_pmu.pebs && !pebs_err) + x86_pmu.pebs_active = 1; + + for_each_possible_cpu(cpu) { + /* + * Ignores wrmsr_on_cpu() errors for offline CPUs they + * will get this call through intel_pmu_cpu_starting(). + */ + init_debug_store_on_cpu(cpu); + } + } +} + +/* + * BTS + */ + +struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); + +void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= DEBUGCTLMSR_TR; + debugctlmsr |= DEBUGCTLMSR_BTS; + if (config & ARCH_PERFMON_EVENTSEL_INT) + debugctlmsr |= DEBUGCTLMSR_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | + DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +int intel_pmu_drain_bts_buffer(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *base, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + unsigned long skip = 0; + struct pt_regs regs; + + if (!event) + return 0; + + if (!x86_pmu.bts_active) + return 0; + + base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= base) + return 0; + + memset(®s, 0, sizeof(regs)); + + ds->bts_index = ds->bts_buffer_base; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + /* + * BTS leaks kernel addresses in branches across the cpl boundary, + * such as traps or system calls, so unless the user is asking for + * kernel tracing (and right now it's not possible), we'd need to + * filter them out. But first we need to count how many of those we + * have in the current batch. This is an extra O(n) pass, however, + * it's much faster than the other one especially considering that + * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the + * alloc_bts_buffer()). + */ + for (at = base; at < top; at++) { + /* + * Note that right now *this* BTS code only works if + * attr::exclude_kernel is set, but let's keep this extra + * check here in case that changes. + */ + if (event->attr.exclude_kernel && + (kernel_ip(at->from) || kernel_ip(at->to))) + skip++; + } + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + rcu_read_lock(); + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) + goto unlock; + + for (at = base; at < top; at++) { + /* Filter out any records that contain kernel addresses. */ + if (event->attr.exclude_kernel && + (kernel_ip(at->from) || kernel_ip(at->to))) + continue; + + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +unlock: + rcu_read_unlock(); + return 1; +} + +static inline void intel_pmu_drain_pebs_buffer(void) +{ + struct perf_sample_data data; + + x86_pmu.drain_pebs(NULL, &data); +} + +/* + * PEBS + */ +struct event_constraint intel_core2_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_atom_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_slm_pebs_event_constraints[] = { + /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_glm_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_grt_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), + INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_nehalem_pebs_event_constraints[] = { + INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_westmere_pebs_event_constraints[] = { + INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_snb_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_ivb_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_hsw_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_bdw_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + + +struct event_constraint intel_skl_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), + /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), + INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_icl_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ + + INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_spr_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), + INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), + INTEL_PSD_CONSTRAINT(0x2cd, 0x1), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + +struct event_constraint *intel_pebs_constraints(struct perf_event *event) +{ + struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints); + struct event_constraint *c; + + if (!event->attr.precise_ip) + return NULL; + + if (pebs_constraints) { + for_each_event_constraint(c, pebs_constraints) { + if (constraint_match(c, event->hw.config)) { + event->hw.flags |= c->flags; + return c; + } + } + } + + /* + * Extended PEBS support + * Makes the PEBS code search the normal constraints. + */ + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + return NULL; + + return &emptyconstraint; +} + +/* + * We need the sched_task callback even for per-cpu events when we use + * the large interrupt threshold, such that we can provide PID and TID + * to PEBS samples. + */ +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) +{ + if (cpuc->n_pebs == cpuc->n_pebs_via_pt) + return false; + + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); +} + +void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!sched_in && pebs_needs_sched_cb(cpuc)) + intel_pmu_drain_pebs_buffer(); +} + +static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + u64 threshold; + int reserved; + + if (cpuc->n_pebs_via_pt) + return; + + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + reserved = max_pebs_events + num_counters_fixed; + else + reserved = max_pebs_events; + + if (cpuc->n_pebs == cpuc->n_large_pebs) { + threshold = ds->pebs_absolute_maximum - + reserved * cpuc->pebs_record_size; + } else { + threshold = ds->pebs_buffer_base + cpuc->pebs_record_size; + } + + ds->pebs_interrupt_threshold = threshold; +} + +static void adaptive_pebs_record_size_update(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 pebs_data_cfg = cpuc->pebs_data_cfg; + int sz = sizeof(struct pebs_basic); + + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) + sz += sizeof(struct pebs_meminfo); + if (pebs_data_cfg & PEBS_DATACFG_GP) + sz += sizeof(struct pebs_gprs); + if (pebs_data_cfg & PEBS_DATACFG_XMMS) + sz += sizeof(struct pebs_xmm); + if (pebs_data_cfg & PEBS_DATACFG_LBRS) + sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); + + cpuc->pebs_record_size = sz; +} + +#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_DATA_PAGE_SIZE) + +static u64 pebs_update_adaptive_cfg(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u64 sample_type = attr->sample_type; + u64 pebs_data_cfg = 0; + bool gprs, tsx_weight; + + if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) && + attr->precise_ip > 1) + return pebs_data_cfg; + + if (sample_type & PERF_PEBS_MEMINFO_TYPE) + pebs_data_cfg |= PEBS_DATACFG_MEMINFO; + + /* + * We need GPRs when: + * + user requested them + * + precise_ip < 2 for the non event IP + * + For RTM TSX weight we need GPRs for the abort code. + */ + gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS); + + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && + ((attr->config & INTEL_ARCH_EVENT_MASK) == + x86_pmu.rtm_abort_event); + + if (gprs || (attr->precise_ip < 2) || tsx_weight) + pebs_data_cfg |= PEBS_DATACFG_GP; + + if ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) + pebs_data_cfg |= PEBS_DATACFG_XMMS; + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + /* + * For now always log all LBRs. Could configure this + * later. + */ + pebs_data_cfg |= PEBS_DATACFG_LBRS | + ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT); + } + + return pebs_data_cfg; +} + +static void +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, + struct perf_event *event, bool add) +{ + struct pmu *pmu = event->ctx->pmu; + /* + * Make sure we get updated with the first PEBS + * event. It will trigger also during removal, but + * that does not hurt: + */ + bool update = cpuc->n_pebs == 1; + + if (needed_cb != pebs_needs_sched_cb(cpuc)) { + if (!needed_cb) + perf_sched_cb_inc(pmu); + else + perf_sched_cb_dec(pmu); + + update = true; + } + + /* + * The PEBS record doesn't shrink on pmu::del(). Doing so would require + * iterating all remaining PEBS events to reconstruct the config. + */ + if (x86_pmu.intel_cap.pebs_baseline && add) { + u64 pebs_data_cfg; + + /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */ + if (cpuc->n_pebs == 1) { + cpuc->pebs_data_cfg = 0; + cpuc->pebs_record_size = sizeof(struct pebs_basic); + } + + pebs_data_cfg = pebs_update_adaptive_cfg(event); + + /* Update pebs_record_size if new event requires more data. */ + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) { + cpuc->pebs_data_cfg |= pebs_data_cfg; + adaptive_pebs_record_size_update(); + update = true; + } + } + + if (update) + pebs_update_threshold(cpuc); +} + +void intel_pmu_pebs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + bool needed_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs++; + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) + cpuc->n_large_pebs++; + if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) + cpuc->n_pebs_via_pt++; + + pebs_update_state(needed_cb, cpuc, event, true); +} + +static void intel_pmu_pebs_via_pt_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!is_pebs_pt(event)) + return; + + if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK)) + cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK; +} + +static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + struct debug_store *ds = cpuc->ds; + u64 value = ds->pebs_event_reset[hwc->idx]; + u32 base = MSR_RELOAD_PMC0; + unsigned int idx = hwc->idx; + + if (!is_pebs_pt(event)) + return; + + if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) + cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD; + + cpuc->pebs_enabled |= PEBS_OUTPUT_PT; + + if (hwc->idx >= INTEL_PMC_IDX_FIXED) { + base = MSR_RELOAD_FIXED_CTR0; + idx = hwc->idx - INTEL_PMC_IDX_FIXED; + if (x86_pmu.intel_cap.pebs_format < 5) + value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx]; + else + value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; + } + wrmsrl(base + idx, value); +} + +void intel_pmu_pebs_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + struct debug_store *ds = cpuc->ds; + unsigned int idx = hwc->idx; + + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + + cpuc->pebs_enabled |= 1ULL << hwc->idx; + + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) + cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) + cpuc->pebs_enabled |= 1ULL << 63; + + if (x86_pmu.intel_cap.pebs_baseline) { + hwc->config |= ICL_EVENTSEL_ADAPTIVE; + if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) { + wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg); + cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg; + } + } + + if (idx >= INTEL_PMC_IDX_FIXED) { + if (x86_pmu.intel_cap.pebs_format < 5) + idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED); + else + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + } + + /* + * Use auto-reload if possible to save a MSR write in the PMI. + * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. + */ + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + ds->pebs_event_reset[idx] = + (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; + } else { + ds->pebs_event_reset[idx] = 0; + } + + intel_pmu_pebs_via_pt_enable(event); +} + +void intel_pmu_pebs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + bool needed_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs--; + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) + cpuc->n_large_pebs--; + if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) + cpuc->n_pebs_via_pt--; + + pebs_update_state(needed_cb, cpuc, event, false); +} + +void intel_pmu_pebs_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (cpuc->n_pebs == cpuc->n_large_pebs && + cpuc->n_pebs != cpuc->n_pebs_via_pt) + intel_pmu_drain_pebs_buffer(); + + cpuc->pebs_enabled &= ~(1ULL << hwc->idx); + + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && + (x86_pmu.version < 5)) + cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) + cpuc->pebs_enabled &= ~(1ULL << 63); + + intel_pmu_pebs_via_pt_disable(event); + + if (cpuc->enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; +} + +void intel_pmu_pebs_enable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); +} + +void intel_pmu_pebs_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (cpuc->pebs_enabled) + __intel_pmu_pebs_disable_all(); +} + +static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + unsigned long from = cpuc->lbr_entries[0].from; + unsigned long old_to, to = cpuc->lbr_entries[0].to; + unsigned long ip = regs->ip; + int is_64bit = 0; + void *kaddr; + int size; + + /* + * We don't need to fixup if the PEBS assist is fault like + */ + if (!x86_pmu.intel_cap.pebs_trap) + return 1; + + /* + * No LBR entry, no basic block, no rewinding + */ + if (!cpuc->lbr_stack.nr || !from || !to) + return 0; + + /* + * Basic blocks should never cross user/kernel boundaries + */ + if (kernel_ip(ip) != kernel_ip(to)) + return 0; + + /* + * unsigned math, either ip is before the start (impossible) or + * the basic block is larger than 1 page (sanity) + */ + if ((ip - to) > PEBS_FIXUP_SIZE) + return 0; + + /* + * We sampled a branch insn, rewind using the LBR stack + */ + if (ip == to) { + set_linear_ip(regs, from); + return 1; + } + + size = ip - to; + if (!kernel_ip(ip)) { + int bytes; + u8 *buf = this_cpu_read(insn_buffer); + + /* 'size' must fit our buffer, see above */ + bytes = copy_from_user_nmi(buf, (void __user *)to, size); + if (bytes != 0) + return 0; + + kaddr = buf; + } else { + kaddr = (void *)to; + } + + do { + struct insn insn; + + old_to = to; + +#ifdef CONFIG_X86_64 + is_64bit = kernel_ip(to) || any_64bit_mode(regs); +#endif + insn_init(&insn, kaddr, size, is_64bit); + + /* + * Make sure there was not a problem decoding the instruction. + * This is doubly important because we have an infinite loop if + * insn.length=0. + */ + if (insn_get_length(&insn)) + break; + + to += insn.length; + kaddr += insn.length; + size -= insn.length; + } while (to < ip); + + if (to == ip) { + set_linear_ip(regs, old_to); + return 1; + } + + /* + * Even though we decoded the basic block, the instruction stream + * never matched the given IP, either the TO or the IP got corrupted. + */ + return 0; +} + +static inline u64 intel_get_tsx_weight(u64 tsx_tuning) +{ + if (tsx_tuning) { + union hsw_tsx_tuning tsx = { .value = tsx_tuning }; + return tsx.cycles_last_block; + } + return 0; +} + +static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax) +{ + u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + + /* For RTM XABORTs also log the abort code from AX */ + if ((txn & PERF_TXN_TRANSACTION) && (ax & 1)) + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + return txn; +} + +static inline u64 get_pebs_status(void *n) +{ + if (x86_pmu.intel_cap.pebs_format < 4) + return ((struct pebs_record_nhm *)n)->status; + return ((struct pebs_basic *)n)->applicable_counters; +} + +#define PERF_X86_EVENT_PEBS_HSW_PREC \ + (PERF_X86_EVENT_PEBS_ST_HSW | \ + PERF_X86_EVENT_PEBS_LD_HSW | \ + PERF_X86_EVENT_PEBS_NA_HSW) + +static u64 get_data_src(struct perf_event *event, u64 aux) +{ + u64 val = PERF_MEM_NA; + int fl = event->hw.flags; + bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); + + if (fl & PERF_X86_EVENT_PEBS_LDLAT) + val = load_latency_data(event, aux); + else if (fl & PERF_X86_EVENT_PEBS_STLAT) + val = store_latency_data(event, aux); + else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID) + val = x86_pmu.pebs_latency_data(event, aux); + else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) + val = precise_datala_hsw(event, aux); + else if (fst) + val = precise_store_data(aux); + return val; +} + +static void setup_pebs_time(struct perf_event *event, + struct perf_sample_data *data, + u64 tsc) +{ + /* Converting to a user-defined clock is not supported yet. */ + if (event->attr.use_clockid != 0) + return; + + /* + * Doesn't support the conversion when the TSC is unstable. + * The TSC unstable case is a corner case and very unlikely to + * happen. If it happens, the TSC in a PEBS record will be + * dropped and fall back to perf_event_clock(). + */ + if (!using_native_sched_clock() || !sched_clock_stable()) + return; + + data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset; + data->sample_flags |= PERF_SAMPLE_TIME; +} + +#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_DATA_PAGE_SIZE) + +static void setup_pebs_fixed_sample_data(struct perf_event *event, + struct pt_regs *iregs, void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + /* + * We cast to the biggest pebs_record but are careful not to + * unconditionally access the 'extra' entries. + */ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct pebs_record_skl *pebs = __pebs; + u64 sample_type; + int fll; + + if (pebs == NULL) + return; + + sample_type = event->attr.sample_type; + fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; + + perf_sample_data_init(data, 0, event->hw.last_period); + + data->period = event->hw.last_period; + + /* + * Use latency for weight (only avail with PEBS-LL) + */ + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) { + data->weight.full = pebs->lat; + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + /* + * data.data_src encodes the data source + */ + if (sample_type & PERF_SAMPLE_DATA_SRC) { + data->data_src.val = get_data_src(event, pebs->dse); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } + + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happened between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + data->callchain = perf_callchain(event, iregs); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; + } + + /* + * We use the interrupt regs as a base because the PEBS record does not + * contain a full regs set, specifically it seems to lack segment + * descriptors, which get used by things like user_mode(). + * + * In the simple case fix up only the IP for PERF_SAMPLE_IP. + */ + *regs = *iregs; + + /* + * Initialize regs_>flags from PEBS, + * Clear exact bit (which uses x86 EFLAGS Reserved bit 3), + * i.e., do not rely on it being zero: + */ + regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT; + + if (sample_type & PERF_SAMPLE_REGS_INTR) { + regs->ax = pebs->ax; + regs->bx = pebs->bx; + regs->cx = pebs->cx; + regs->dx = pebs->dx; + regs->si = pebs->si; + regs->di = pebs->di; + + regs->bp = pebs->bp; + regs->sp = pebs->sp; + +#ifndef CONFIG_X86_32 + regs->r8 = pebs->r8; + regs->r9 = pebs->r9; + regs->r10 = pebs->r10; + regs->r11 = pebs->r11; + regs->r12 = pebs->r12; + regs->r13 = pebs->r13; + regs->r14 = pebs->r14; + regs->r15 = pebs->r15; +#endif + } + + if (event->attr.precise_ip > 1) { + /* + * Haswell and later processors have an 'eventing IP' + * (real IP) which fixes the off-by-1 skid in hardware. + * Use it when precise_ip >= 2 : + */ + if (x86_pmu.intel_cap.pebs_format >= 2) { + set_linear_ip(regs, pebs->real_ip); + regs->flags |= PERF_EFLAGS_EXACT; + } else { + /* Otherwise, use PEBS off-by-1 IP: */ + set_linear_ip(regs, pebs->ip); + + /* + * With precise_ip >= 2, try to fix up the off-by-1 IP + * using the LBR. If successful, the fixup function + * corrects regs->ip and calls set_linear_ip() on regs: + */ + if (intel_pmu_pebs_fixup_ip(regs)) + regs->flags |= PERF_EFLAGS_EXACT; + } + } else { + /* + * When precise_ip == 1, return the PEBS off-by-1 IP, + * no fixup attempted: + */ + set_linear_ip(regs, pebs->ip); + } + + + if ((sample_type & PERF_SAMPLE_ADDR_TYPE) && + x86_pmu.intel_cap.pebs_format >= 1) { + data->addr = pebs->dla; + data->sample_flags |= PERF_SAMPLE_ADDR; + } + + if (x86_pmu.intel_cap.pebs_format >= 2) { + /* Only set the TSX weight when no memory weight. */ + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) { + data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + if (sample_type & PERF_SAMPLE_TRANSACTION) { + data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, + pebs->ax); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } + } + + /* + * v3 supplies an accurate time stamp, so we use that + * for the time stamp. + * + * We can only do this for the default trace clock. + */ + if (x86_pmu.intel_cap.pebs_format >= 3) + setup_pebs_time(event, data, pebs->tsc); + + if (has_branch_stack(event)) { + data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } +} + +static void adaptive_pebs_save_regs(struct pt_regs *regs, + struct pebs_gprs *gprs) +{ + regs->ax = gprs->ax; + regs->bx = gprs->bx; + regs->cx = gprs->cx; + regs->dx = gprs->dx; + regs->si = gprs->si; + regs->di = gprs->di; + regs->bp = gprs->bp; + regs->sp = gprs->sp; +#ifndef CONFIG_X86_32 + regs->r8 = gprs->r8; + regs->r9 = gprs->r9; + regs->r10 = gprs->r10; + regs->r11 = gprs->r11; + regs->r12 = gprs->r12; + regs->r13 = gprs->r13; + regs->r14 = gprs->r14; + regs->r15 = gprs->r15; +#endif +} + +#define PEBS_LATENCY_MASK 0xffff +#define PEBS_CACHE_LATENCY_OFFSET 32 + +/* + * With adaptive PEBS the layout depends on what fields are configured. + */ + +static void setup_pebs_adaptive_sample_data(struct perf_event *event, + struct pt_regs *iregs, void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct pebs_basic *basic = __pebs; + void *next_record = basic + 1; + u64 sample_type; + u64 format_size; + struct pebs_meminfo *meminfo = NULL; + struct pebs_gprs *gprs = NULL; + struct x86_perf_regs *perf_regs; + + if (basic == NULL) + return; + + perf_regs = container_of(regs, struct x86_perf_regs, regs); + perf_regs->xmm_regs = NULL; + + sample_type = event->attr.sample_type; + format_size = basic->format_size; + perf_sample_data_init(data, 0, event->hw.last_period); + data->period = event->hw.last_period; + + setup_pebs_time(event, data, basic->tsc); + + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happened between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + data->callchain = perf_callchain(event, iregs); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; + } + + *regs = *iregs; + /* The ip in basic is EventingIP */ + set_linear_ip(regs, basic->ip); + regs->flags = PERF_EFLAGS_EXACT; + + /* + * The record for MEMINFO is in front of GP + * But PERF_SAMPLE_TRANSACTION needs gprs->ax. + * Save the pointer here but process later. + */ + if (format_size & PEBS_DATACFG_MEMINFO) { + meminfo = next_record; + next_record = meminfo + 1; + } + + if (format_size & PEBS_DATACFG_GP) { + gprs = next_record; + next_record = gprs + 1; + + if (event->attr.precise_ip < 2) { + set_linear_ip(regs, gprs->ip); + regs->flags &= ~PERF_EFLAGS_EXACT; + } + + if (sample_type & PERF_SAMPLE_REGS_INTR) + adaptive_pebs_save_regs(regs, gprs); + } + + if (format_size & PEBS_DATACFG_MEMINFO) { + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + u64 weight = meminfo->latency; + + if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) { + data->weight.var2_w = weight & PEBS_LATENCY_MASK; + weight >>= PEBS_CACHE_LATENCY_OFFSET; + } + + /* + * Although meminfo::latency is defined as a u64, + * only the lower 32 bits include the valid data + * in practice on Ice Lake and earlier platforms. + */ + if (sample_type & PERF_SAMPLE_WEIGHT) { + data->weight.full = weight ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + } else { + data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + if (sample_type & PERF_SAMPLE_DATA_SRC) { + data->data_src.val = get_data_src(event, meminfo->aux); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } + + if (sample_type & PERF_SAMPLE_ADDR_TYPE) { + data->addr = meminfo->address; + data->sample_flags |= PERF_SAMPLE_ADDR; + } + + if (sample_type & PERF_SAMPLE_TRANSACTION) { + data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, + gprs ? gprs->ax : 0); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } + } + + if (format_size & PEBS_DATACFG_XMMS) { + struct pebs_xmm *xmm = next_record; + + next_record = xmm + 1; + perf_regs->xmm_regs = xmm->xmm; + } + + if (format_size & PEBS_DATACFG_LBRS) { + struct lbr_entry *lbr = next_record; + int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) + & 0xff) + 1; + next_record = next_record + num_lbr * sizeof(struct lbr_entry); + + if (has_branch_stack(event)) { + intel_pmu_store_pebs_lbrs(lbr); + data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + } + + WARN_ONCE(next_record != __pebs + (format_size >> 48), + "PEBS record size %llu, expected %llu, config %llx\n", + format_size >> 48, + (u64)(next_record - __pebs), + basic->format_size); +} + +static inline void * +get_next_pebs_record_by_bit(void *base, void *top, int bit) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + void *at; + u64 pebs_status; + + /* + * fmt0 does not have a status bitfield (does not use + * perf_record_nhm format) + */ + if (x86_pmu.intel_cap.pebs_format < 1) + return base; + + if (base == NULL) + return NULL; + + for (at = base; at < top; at += cpuc->pebs_record_size) { + unsigned long status = get_pebs_status(at); + + if (test_bit(bit, (unsigned long *)&status)) { + /* PEBS v3 has accurate status bits */ + if (x86_pmu.intel_cap.pebs_format >= 3) + return at; + + if (status == (1 << bit)) + return at; + + /* clear non-PEBS bit and re-check */ + pebs_status = status & cpuc->pebs_enabled; + pebs_status &= PEBS_COUNTER_MASK; + if (pebs_status == (1 << bit)) + return at; + } + } + return NULL; +} + +void intel_pmu_auto_reload_read(struct perf_event *event) +{ + WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); + + perf_pmu_disable(event->pmu); + intel_pmu_drain_pebs_buffer(); + perf_pmu_enable(event->pmu); +} + +/* + * Special variant of intel_pmu_save_and_restart() for auto-reload. + */ +static int +intel_pmu_save_and_restart_reload(struct perf_event *event, int count) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - x86_pmu.cntval_bits; + u64 period = hwc->sample_period; + u64 prev_raw_count, new_raw_count; + s64 new, old; + + WARN_ON(!period); + + /* + * drain_pebs() only happens when the PMU is disabled. + */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + + prev_raw_count = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new_raw_count); + local64_set(&hwc->prev_count, new_raw_count); + + /* + * Since the counter increments a negative counter value and + * overflows on the sign switch, giving the interval: + * + * [-period, 0] + * + * the difference between two consecutive reads is: + * + * A) value2 - value1; + * when no overflows have happened in between, + * + * B) (0 - value1) + (value2 - (-period)); + * when one overflow happened in between, + * + * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period)); + * when @n overflows happened in between. + * + * Here A) is the obvious difference, B) is the extension to the + * discrete interval, where the first term is to the top of the + * interval and the second term is from the bottom of the next + * interval and C) the extension to multiple intervals, where the + * middle term is the whole intervals covered. + * + * An equivalent of C, by reduction, is: + * + * value2 - value1 + n * period + */ + new = ((s64)(new_raw_count << shift) >> shift); + old = ((s64)(prev_raw_count << shift) >> shift); + local64_add(new - old + count * period, &event->count); + + local64_set(&hwc->period_left, -new); + + perf_event_update_userpage(event); + + return 0; +} + +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; + void *at = get_next_pebs_record_by_bit(base, top, bit); + static struct pt_regs dummy_iregs; + + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } else if (!intel_pmu_save_and_restart(event)) + return; + + if (!iregs) + iregs = &dummy_iregs; + + while (count > 1) { + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); + at += cpuc->pebs_record_size; + at = get_next_pebs_record_by_bit(at, top, bit); + count--; + } + + setup_sample(event, iregs, at, data, regs); + if (iregs == &dummy_iregs) { + /* + * The PEBS records may be drained in the non-overflow context, + * e.g., large PEBS + context switch. Perf should treat the + * last record the same as other PEBS records, and doesn't + * invoke the generic overflow handler. + */ + perf_event_output(event, data, regs); + } else { + /* + * All but the last records are processed. + * The last one is left to be able to call the overflow handler. + */ + if (perf_event_overflow(event, data, regs)) + x86_pmu_stop(event, 0); + } +} + +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct perf_event *event = cpuc->events[0]; /* PMC0 only */ + struct pebs_record_core *at, *top; + int n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; + + /* + * Whatever else happens, drain the thing + */ + ds->pebs_index = ds->pebs_buffer_base; + + if (!test_bit(0, cpuc->active_mask)) + return; + + WARN_ON_ONCE(!event); + + if (!event->attr.precise_ip) + return; + + n = top - at; + if (n <= 0) { + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + return; + } + + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, + setup_pebs_fixed_sample_data); +} + +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +{ + struct perf_event *event; + int bit; + + /* + * The drain_pebs() could be called twice in a short period + * for auto-reload event in pmu::read(). There are no + * overflows have happened in between. + * It needs to call intel_pmu_save_and_restart_reload() to + * update the event->count for this case. + */ + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + event = cpuc->events[bit]; + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + } +} + +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct perf_event *event; + void *base, *at, *top; + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + int bit, i, size; + u64 mask; + + if (!x86_pmu.pebs_active) + return; + + base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + mask = (1ULL << x86_pmu.max_pebs_events) - 1; + size = x86_pmu.max_pebs_events; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) { + mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED; + size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + } + + if (unlikely(base >= top)) { + intel_pmu_pebs_event_update_no_drain(cpuc, size); + return; + } + + for (at = base; at < top; at += x86_pmu.pebs_record_size) { + struct pebs_record_nhm *p = at; + u64 pebs_status; + + pebs_status = p->status & cpuc->pebs_enabled; + pebs_status &= mask; + + /* PEBS v3 has more accurate status bits */ + if (x86_pmu.intel_cap.pebs_format >= 3) { + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) + counts[bit]++; + + continue; + } + + /* + * On some CPUs the PEBS status can be zero when PEBS is + * racing with clearing of GLOBAL_STATUS. + * + * Normally we would drop that record, but in the + * case when there is only a single active PEBS event + * we can assume it's for that event. + */ + if (!pebs_status && cpuc->pebs_enabled && + !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) + pebs_status = p->status = cpuc->pebs_enabled; + + bit = find_first_bit((unsigned long *)&pebs_status, + x86_pmu.max_pebs_events); + if (bit >= x86_pmu.max_pebs_events) + continue; + + /* + * The PEBS hardware does not deal well with the situation + * when events happen near to each other and multiple bits + * are set. But it should happen rarely. + * + * If these events include one PEBS and multiple non-PEBS + * events, it doesn't impact PEBS record. The record will + * be handled normally. (slow path) + * + * If these events include two or more PEBS events, the + * records for the events can be collapsed into a single + * one, and it's not possible to reconstruct all events + * that caused the PEBS record. It's called collision. + * If collision happened, the record will be dropped. + */ + if (pebs_status != (1ULL << bit)) { + for_each_set_bit(i, (unsigned long *)&pebs_status, size) + error[i]++; + continue; + } + + counts[bit]++; + } + + for_each_set_bit(bit, (unsigned long *)&mask, size) { + if ((counts[bit] == 0) && (error[bit] == 0)) + continue; + + event = cpuc->events[bit]; + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; + + /* log dropped samples number */ + if (error[bit]) { + perf_log_lost_samples(event, error[bit]); + + if (iregs && perf_event_account_interrupt(event)) + x86_pmu_stop(event, 0); + } + + if (counts[bit]) { + __intel_pmu_pebs_event(event, iregs, data, base, + top, bit, counts[bit], + setup_pebs_fixed_sample_data); + } + } +} + +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) +{ + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + struct debug_store *ds = cpuc->ds; + struct perf_event *event; + void *base, *at, *top; + int bit, size; + u64 mask; + + if (!x86_pmu.pebs_active) + return; + + base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_basic *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + mask = ((1ULL << max_pebs_events) - 1) | + (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); + size = INTEL_PMC_IDX_FIXED + num_counters_fixed; + + if (unlikely(base >= top)) { + intel_pmu_pebs_event_update_no_drain(cpuc, size); + return; + } + + for (at = base; at < top; at += cpuc->pebs_record_size) { + u64 pebs_status; + + pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; + pebs_status &= mask; + + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) + counts[bit]++; + } + + for_each_set_bit(bit, (unsigned long *)&mask, size) { + if (counts[bit] == 0) + continue; + + event = cpuc->events[bit]; + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; + + __intel_pmu_pebs_event(event, iregs, data, base, + top, bit, counts[bit], + setup_pebs_adaptive_sample_data); + } +} + +/* + * BTS, PEBS probe and setup + */ + +void __init intel_ds_init(void) +{ + /* + * No support for 32bit formats + */ + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; + if (x86_pmu.version <= 4) + x86_pmu.pebs_no_isolation = 1; + + if (x86_pmu.pebs) { + char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; + char *pebs_qual = ""; + int format = x86_pmu.intel_cap.pebs_format; + + if (format < 4) + x86_pmu.intel_cap.pebs_baseline = 0; + + switch (format) { + case 0: + pr_cont("PEBS fmt0%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + /* + * Using >PAGE_SIZE buffers makes the WRMSR to + * PERF_GLOBAL_CTRL in intel_pmu_enable_all() + * mysteriously hang on Core2. + * + * As a workaround, we don't do this. + */ + x86_pmu.pebs_buffer_size = PAGE_SIZE; + x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; + break; + + case 1: + pr_cont("PEBS fmt1%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; + break; + + case 2: + pr_cont("PEBS fmt2%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; + break; + + case 3: + pr_cont("PEBS fmt3%c, ", pebs_type); + x86_pmu.pebs_record_size = + sizeof(struct pebs_record_skl); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; + break; + + case 4: + case 5: + x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; + x86_pmu.pebs_record_size = sizeof(struct pebs_basic); + if (x86_pmu.intel_cap.pebs_baseline) { + x86_pmu.large_pebs_flags |= + PERF_SAMPLE_BRANCH_STACK | + PERF_SAMPLE_TIME; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.pebs_capable = ~0ULL; + pebs_qual = "-baseline"; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; + } else { + /* Only basic record supported */ + x86_pmu.large_pebs_flags &= + ~(PERF_SAMPLE_ADDR | + PERF_SAMPLE_TIME | + PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_TRANSACTION | + PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_REGS_INTR); + } + pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); + + if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { + pr_cont("PEBS-via-PT, "); + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + } + + break; + + default: + pr_cont("no PEBS fmt%d%c, ", format, pebs_type); + x86_pmu.pebs = 0; + } + } +} + +void perf_restore_debug_store(void) +{ + struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); + + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); +} diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c new file mode 100644 index 000000000..618001c20 --- /dev/null +++ b/arch/x86/events/intel/knc.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Driver for Intel Xeon Phi "Knights Corner" PMU */ + +#include +#include + +#include + +#include "../perf_event.h" + +static const u64 knc_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, +}; + +static const u64 __initconst knc_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + /* On Xeon Phi event "0" is a valid DATA_READ */ + /* (L1 Data Cache Reads) Instruction. */ + /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ + /* bit will always be set in x86_pmu_hw_config(). */ + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ + [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ + [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + /* see note on L1 OP_READ */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ + [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + + +static u64 knc_pmu_event_map(int hw_event) +{ + return knc_perfmon_event_map[hw_event]; +} + +static struct event_constraint knc_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ + INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ + INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ + INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ + INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ + INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ + INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ + INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ + INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ + INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ + INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ + INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ + INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ + INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ + INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ + EVENT_CONSTRAINT_END +}; + +#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d +#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e +#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f + +#define KNC_ENABLE_COUNTER0 0x00000001 +#define KNC_ENABLE_COUNTER1 0x00000002 + +static void knc_pmu_disable_all(void) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static void knc_pmu_enable_all(int added) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static inline void +knc_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static void knc_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static inline u64 knc_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void knc_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); +} + +static int knc_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int handled = 0; + int bit, loops; + u64 status; + + cpuc = this_cpu_ptr(&cpu_hw_events); + + knc_pmu_disable_all(); + + status = knc_pmu_get_status(); + if (!status) { + knc_pmu_enable_all(0); + return handled; + } + + loops = 0; +again: + knc_pmu_ack_status(status); + if (++loops > 100) { + WARN_ONCE(1, "perf: irq loop stuck!\n"); + perf_event_print_debug(); + goto done; + } + + inc_irq_stat(apic_perf_irqs); + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * Repeat if there is more work to be done: + */ + status = knc_pmu_get_status(); + if (status) + goto again; + +done: + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + knc_pmu_enable_all(0); + + return handled; +} + + +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_knc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static const struct x86_pmu knc_pmu __initconst = { + .name = "knc", + .handle_irq = knc_pmu_handle_irq, + .disable_all = knc_pmu_disable_all, + .enable_all = knc_pmu_enable_all, + .enable = knc_pmu_enable_event, + .disable = knc_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_KNC_EVNTSEL0, + .perfctr = MSR_KNC_PERFCTR0, + .event_map = knc_pmu_event_map, + .max_events = ARRAY_SIZE(knc_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 39) - 1, + .version = 0, + .num_counters = 2, + .cntval_bits = 40, + .cntval_mask = (1ULL << 40) - 1, + .get_event_constraints = x86_get_event_constraints, + .event_constraints = knc_event_constraints, + .format_attrs = intel_knc_formats_attr, +}; + +__init int knc_pmu_init(void) +{ + x86_pmu = knc_pmu; + + memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c new file mode 100644 index 000000000..4dbde69c4 --- /dev/null +++ b/arch/x86/events/intel/lbr.c @@ -0,0 +1,1620 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include +#include + +#include "../perf_event.h" + +/* + * Intel LBR_SELECT bits + * Intel Vol3a, April 2011, Section 16.7 Table 16-10 + * + * Hardware branch filter (not available on all CPUs) + */ +#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ +#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ +#define LBR_JCC_BIT 2 /* do not capture conditional branches */ +#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ +#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ +#define LBR_RETURN_BIT 5 /* do not capture near returns */ +#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ +#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ +#define LBR_FAR_BIT 8 /* do not capture far branches */ +#define LBR_CALL_STACK_BIT 9 /* enable call stack */ + +/* + * Following bit only exists in Linux; we mask it out before writing it to + * the actual MSR. But it helps the constraint perf code to understand + * that this is a separate configuration. + */ +#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ + +#define LBR_KERNEL (1 << LBR_KERNEL_BIT) +#define LBR_USER (1 << LBR_USER_BIT) +#define LBR_JCC (1 << LBR_JCC_BIT) +#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) +#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) +#define LBR_RETURN (1 << LBR_RETURN_BIT) +#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) +#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) +#define LBR_FAR (1 << LBR_FAR_BIT) +#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) +#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) + +#define LBR_PLM (LBR_KERNEL | LBR_USER) + +#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ +#define LBR_NOT_SUPP -1 /* LBR filter not supported */ +#define LBR_IGN 0 /* ignored */ + +#define LBR_ANY \ + (LBR_JCC |\ + LBR_REL_CALL |\ + LBR_IND_CALL |\ + LBR_RETURN |\ + LBR_REL_JMP |\ + LBR_IND_JMP |\ + LBR_FAR) + +#define LBR_FROM_FLAG_MISPRED BIT_ULL(63) +#define LBR_FROM_FLAG_IN_TX BIT_ULL(62) +#define LBR_FROM_FLAG_ABORT BIT_ULL(61) + +#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) + +/* + * Intel LBR_CTL bits + * + * Hardware branch filter for Arch LBR + */ +#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ +#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ +#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ +#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ +#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ +#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ +#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ +#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ +#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ +#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ + +#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) +#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) +#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) +#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) +#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) +#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) +#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) +#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) +#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) +#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) + +#define ARCH_LBR_ANY \ + (ARCH_LBR_JCC |\ + ARCH_LBR_REL_JMP |\ + ARCH_LBR_IND_JMP |\ + ARCH_LBR_REL_CALL |\ + ARCH_LBR_IND_CALL |\ + ARCH_LBR_RETURN |\ + ARCH_LBR_OTHER_BRANCH) + +#define ARCH_LBR_CTL_MASK 0x7f000e + +static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); + +static __always_inline bool is_lbr_call_stack_bit_set(u64 config) +{ + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return !!(config & ARCH_LBR_CALL_STACK); + + return !!(config & LBR_CALL_STACK); +} + +/* + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI + * otherwise it becomes near impossible to get a reliable stack. + */ + +static void __intel_pmu_lbr_enable(bool pmi) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 debugctl, lbr_select = 0, orig_debugctl; + + /* + * No need to unfreeze manually, as v4 can do that as part + * of the GLOBAL_STATUS ack. + */ + if (pmi && x86_pmu.version >= 4) + return; + + /* + * No need to reprogram LBR_SELECT in a PMI, as it + * did not change. + */ + if (cpuc->lbr_sel) + lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) + wrmsrl(MSR_LBR_SELECT, lbr_select); + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + orig_debugctl = debugctl; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + debugctl |= DEBUGCTLMSR_LBR; + /* + * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. + * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions + * may cause superfluous increase/decrease of LBR_TOS. + */ + if (is_lbr_call_stack_bit_set(lbr_select)) + debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + else + debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + + if (orig_debugctl != debugctl) + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); +} + +void intel_pmu_lbr_reset_32(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) + wrmsrl(x86_pmu.lbr_from + i, 0); +} + +void intel_pmu_lbr_reset_64(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrl(x86_pmu.lbr_to + i, 0); + if (x86_pmu.lbr_has_info) + wrmsrl(x86_pmu.lbr_info + i, 0); + } +} + +static void intel_pmu_arch_lbr_reset(void) +{ + /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ + wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); +} + +void intel_pmu_lbr_reset(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + x86_pmu.lbr_reset(); + + cpuc->last_task_ctx = NULL; + cpuc->last_log_id = 0; + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) + wrmsrl(MSR_LBR_SELECT, 0); +} + +/* + * TOS = most recently recorded branch + */ +static inline u64 intel_pmu_lbr_tos(void) +{ + u64 tos; + + rdmsrl(x86_pmu.lbr_tos, tos); + return tos; +} + +enum { + LBR_NONE, + LBR_VALID, +}; + +/* + * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x + * are the TSX flags when TSX is supported, but when TSX is not supported + * they have no consistent behavior: + * + * - For wrmsr(), bits 61:62 are considered part of the sign extension. + * - For HW updates (branch captures) bits 61:62 are always OFF and are not + * part of the sign extension. + * + * Therefore, if: + * + * 1) LBR format LBR_FORMAT_EIP_FLAGS2 + * 2) CPU has no TSX support enabled + * + * ... then any value passed to wrmsr() must be sign extended to 63 bits and any + * value from rdmsr() must be converted to have a 61 bits sign extension, + * ignoring the TSX flags. + */ +static inline bool lbr_from_signext_quirk_needed(void) +{ + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || + boot_cpu_has(X86_FEATURE_RTM); + + return !tsx_support; +} + +static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); + +/* If quirk is enabled, ensure sign extension is 63 bits: */ +inline u64 lbr_from_signext_quirk_wr(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Sign extend into bits 61:62 while preserving bit 63. + * + * Quirk is enabled when TSX is disabled. Therefore TSX bits + * in val are always OFF and must be changed to be sign + * extension bits. Since bits 59:60 are guaranteed to be + * part of the sign extension bits, we can just copy them + * to 61:62. + */ + val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; + } + return val; +} + +/* + * If quirk is needed, ensure sign extension is 61 bits: + */ +static u64 lbr_from_signext_quirk_rd(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Quirk is on when TSX is not enabled. Therefore TSX + * flags must be read as OFF. + */ + val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); + } + return val; +} + +static __always_inline void wrlbr_from(unsigned int idx, u64 val) +{ + val = lbr_from_signext_quirk_wr(val); + wrmsrl(x86_pmu.lbr_from + idx, val); +} + +static __always_inline void wrlbr_to(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_to + idx, val); +} + +static __always_inline void wrlbr_info(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_info + idx, val); +} + +static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) +{ + u64 val; + + if (lbr) + return lbr->from; + + rdmsrl(x86_pmu.lbr_from + idx, val); + + return lbr_from_signext_quirk_rd(val); +} + +static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) +{ + u64 val; + + if (lbr) + return lbr->to; + + rdmsrl(x86_pmu.lbr_to + idx, val); + + return val; +} + +static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) +{ + u64 val; + + if (lbr) + return lbr->info; + + rdmsrl(x86_pmu.lbr_info + idx, val); + + return val; +} + +static inline void +wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) +{ + wrlbr_from(idx, lbr->from); + wrlbr_to(idx, lbr->to); + if (need_info) + wrlbr_info(idx, lbr->info); +} + +static inline bool +rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) +{ + u64 from = rdlbr_from(idx, NULL); + + /* Don't read invalid entry */ + if (!from) + return false; + + lbr->from = from; + lbr->to = rdlbr_to(idx, NULL); + if (need_info) + lbr->info = rdlbr_info(idx, NULL); + + return true; +} + +void intel_pmu_lbr_restore(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx = ctx; + bool need_info = x86_pmu.lbr_has_info; + u64 tos = task_ctx->tos; + unsigned lbr_idx, mask; + int i; + + mask = x86_pmu.lbr_nr - 1; + for (i = 0; i < task_ctx->valid_lbrs; i++) { + lbr_idx = (tos - i) & mask; + wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); + } + + for (; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + wrlbr_from(lbr_idx, 0); + wrlbr_to(lbr_idx, 0); + if (need_info) + wrlbr_info(lbr_idx, 0); + } + + wrmsrl(x86_pmu.lbr_tos, tos); + + if (cpuc->lbr_select) + wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); +} + +static void intel_pmu_arch_lbr_restore(void *ctx) +{ + struct x86_perf_task_context_arch_lbr *task_ctx = ctx; + struct lbr_entry *entries = task_ctx->entries; + int i; + + /* Fast reset the LBRs before restore if the call stack is not full. */ + if (!entries[x86_pmu.lbr_nr - 1].from) + intel_pmu_arch_lbr_reset(); + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!entries[i].from) + break; + wrlbr_all(&entries[i], i, true); + } +} + +/* + * Restore the Architecture LBR state from the xsave area in the perf + * context data for the task via the XRSTORS instruction. + */ +static void intel_pmu_arch_lbr_xrstors(void *ctx) +{ + struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; + + xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); +} + +static __always_inline bool lbr_is_reset_in_cstate(void *ctx) +{ + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); + + return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); +} + +static void __intel_pmu_lbr_restore(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (task_context_opt(ctx)->lbr_callstack_users == 0 || + task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { + intel_pmu_lbr_reset(); + return; + } + + /* + * Does not restore the LBR registers, if + * - No one else touched them, and + * - Was not cleared in Cstate + */ + if ((ctx == cpuc->last_task_ctx) && + (task_context_opt(ctx)->log_id == cpuc->last_log_id) && + !lbr_is_reset_in_cstate(ctx)) { + task_context_opt(ctx)->lbr_stack_state = LBR_NONE; + return; + } + + x86_pmu.lbr_restore(ctx); + + task_context_opt(ctx)->lbr_stack_state = LBR_NONE; +} + +void intel_pmu_lbr_save(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx = ctx; + bool need_info = x86_pmu.lbr_has_info; + unsigned lbr_idx, mask; + u64 tos; + int i; + + mask = x86_pmu.lbr_nr - 1; + tos = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) + break; + } + task_ctx->valid_lbrs = i; + task_ctx->tos = tos; + + if (cpuc->lbr_select) + rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); +} + +static void intel_pmu_arch_lbr_save(void *ctx) +{ + struct x86_perf_task_context_arch_lbr *task_ctx = ctx; + struct lbr_entry *entries = task_ctx->entries; + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!rdlbr_all(&entries[i], i, true)) + break; + } + + /* LBR call stack is not full. Reset is required in restore. */ + if (i < x86_pmu.lbr_nr) + entries[x86_pmu.lbr_nr - 1].from = 0; +} + +/* + * Save the Architecture LBR state to the xsave area in the perf + * context data for the task via the XSAVES instruction. + */ +static void intel_pmu_arch_lbr_xsaves(void *ctx) +{ + struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; + + xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); +} + +static void __intel_pmu_lbr_save(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (task_context_opt(ctx)->lbr_callstack_users == 0) { + task_context_opt(ctx)->lbr_stack_state = LBR_NONE; + return; + } + + x86_pmu.lbr_save(ctx); + + task_context_opt(ctx)->lbr_stack_state = LBR_VALID; + + cpuc->last_task_ctx = ctx; + cpuc->last_log_id = ++task_context_opt(ctx)->log_id; +} + +void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, + struct perf_event_context *next) +{ + void *prev_ctx_data, *next_ctx_data; + + swap(prev->task_ctx_data, next->task_ctx_data); + + /* + * Architecture specific synchronization makes sense in + * case both prev->task_ctx_data and next->task_ctx_data + * pointers are allocated. + */ + + prev_ctx_data = next->task_ctx_data; + next_ctx_data = prev->task_ctx_data; + + if (!prev_ctx_data || !next_ctx_data) + return; + + swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, + task_context_opt(next_ctx_data)->lbr_callstack_users); +} + +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + void *task_ctx; + + if (!cpuc->lbr_users) + return; + + /* + * If LBR callstack feature is enabled and the stack was saved when + * the task was scheduled out, restore the stack. Otherwise flush + * the LBR stack. + */ + task_ctx = ctx ? ctx->task_ctx_data : NULL; + if (task_ctx) { + if (sched_in) + __intel_pmu_lbr_restore(task_ctx); + else + __intel_pmu_lbr_save(task_ctx); + return; + } + + /* + * Since a context switch can flip the address space and LBR entries + * are not tagged with an identifier, we need to wipe the LBR, even for + * per-cpu events. You simply cannot resolve the branches from the old + * address space. + */ + if (sched_in) + intel_pmu_lbr_reset(); +} + +static inline bool branch_user_callstack(unsigned br_sel) +{ + return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); +} + +void intel_pmu_lbr_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) + cpuc->lbr_select = 1; + + cpuc->br_sel = event->hw.branch_reg.reg; + + if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) + task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++; + + /* + * Request pmu::sched_task() callback, which will fire inside the + * regular perf event scheduling, so that call will: + * + * - restore or wipe; when LBR-callstack, + * - wipe; otherwise, + * + * when this is from __perf_event_task_sched_in(). + * + * However, if this is from perf_install_in_context(), no such callback + * will follow and we'll need to reset the LBR here if this is the + * first LBR event. + * + * The problem is, we cannot tell these cases apart... but we can + * exclude the biggest chunk of cases by looking at + * event->total_time_running. An event that has accrued runtime cannot + * be 'new'. Conversely, a new event can get installed through the + * context switch path for the first time. + */ + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) + cpuc->lbr_pebs_users++; + perf_sched_cb_inc(event->ctx->pmu); + if (!cpuc->lbr_users++ && !event->total_time_running) + intel_pmu_lbr_reset(); +} + +void release_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (kmem_cache && cpuc->lbr_xsave) { + kmem_cache_free(kmem_cache, cpuc->lbr_xsave); + cpuc->lbr_xsave = NULL; + } + } +} + +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + } +} + +void intel_pmu_lbr_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + if (branch_user_callstack(cpuc->br_sel) && + event->ctx->task_ctx_data) + task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--; + + if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) + cpuc->lbr_select = 0; + + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) + cpuc->lbr_pebs_users--; + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); + perf_sched_cb_dec(event->ctx->pmu); +} + +static inline bool vlbr_exclude_host(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + return test_bit(INTEL_PMC_IDX_FIXED_VLBR, + (unsigned long *)&cpuc->intel_ctrl_guest_mask); +} + +void intel_pmu_lbr_enable_all(bool pmi) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (cpuc->lbr_users && !vlbr_exclude_host()) + __intel_pmu_lbr_enable(pmi); +} + +void intel_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (cpuc->lbr_users && !vlbr_exclude_host()) { + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return __intel_pmu_arch_lbr_disable(); + + __intel_pmu_lbr_disable(); + } +} + +void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + struct perf_branch_entry *br = cpuc->lbr_entries; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + unsigned long lbr_idx = (tos - i) & mask; + union { + struct { + u32 from; + u32 to; + }; + u64 lbr; + } msr_lastbranch; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + + perf_clear_branch_entry_bitfields(br); + + br->from = msr_lastbranch.from; + br->to = msr_lastbranch.to; + br++; + } + cpuc->lbr_stack.nr = i; + cpuc->lbr_stack.hw_idx = tos; +} + +/* + * Due to lack of segmentation in Linux the effective address (offset) + * is the same as the linear address, allowing us to merge the LIP and EIP + * LBR formats. + */ +void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) +{ + bool need_info = false, call_stack = false; + unsigned long mask = x86_pmu.lbr_nr - 1; + struct perf_branch_entry *br = cpuc->lbr_entries; + u64 tos = intel_pmu_lbr_tos(); + int i; + int out = 0; + int num = x86_pmu.lbr_nr; + + if (cpuc->lbr_sel) { + need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); + if (cpuc->lbr_sel->config & LBR_CALL_STACK) + call_stack = true; + } + + for (i = 0; i < num; i++) { + unsigned long lbr_idx = (tos - i) & mask; + u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; + u16 cycles = 0; + + from = rdlbr_from(lbr_idx, NULL); + to = rdlbr_to(lbr_idx, NULL); + + /* + * Read LBR call stack entries + * until invalid entry (0s) is detected. + */ + if (call_stack && !from) + break; + + if (x86_pmu.lbr_has_info) { + if (need_info) { + u64 info; + + info = rdlbr_info(lbr_idx, NULL); + mis = !!(info & LBR_INFO_MISPRED); + pred = !mis; + cycles = (info & LBR_INFO_CYCLES); + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(info & LBR_INFO_IN_TX); + abort = !!(info & LBR_INFO_ABORT); + } + } + } else { + int skip = 0; + + if (x86_pmu.lbr_from_flags) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + skip = 1; + } + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + skip = 3; + } + from = (u64)((((s64)from) << skip) >> skip); + + if (x86_pmu.lbr_to_cycles) { + cycles = ((to >> 48) & LBR_INFO_CYCLES); + to = (u64)((((s64)to) << 16) >> 16); + } + } + + /* + * Some CPUs report duplicated abort records, + * with the second entry not having an abort bit set. + * Skip them here. This loop runs backwards, + * so we need to undo the previous record. + * If the abort just happened outside the window + * the extra entry cannot be removed. + */ + if (abort && x86_pmu.lbr_double_abort && out > 0) + out--; + + perf_clear_branch_entry_bitfields(br+out); + br[out].from = from; + br[out].to = to; + br[out].mispred = mis; + br[out].predicted = pred; + br[out].in_tx = in_tx; + br[out].abort = abort; + br[out].cycles = cycles; + out++; + } + cpuc->lbr_stack.nr = out; + cpuc->lbr_stack.hw_idx = tos; +} + +static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); + +static __always_inline int get_lbr_br_type(u64 info) +{ + int type = 0; + + if (static_branch_likely(&x86_lbr_type)) + type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; + + return type; +} + +static __always_inline bool get_lbr_mispred(u64 info) +{ + bool mispred = 0; + + if (static_branch_likely(&x86_lbr_mispred)) + mispred = !!(info & LBR_INFO_MISPRED); + + return mispred; +} + +static __always_inline u16 get_lbr_cycles(u64 info) +{ + u16 cycles = info & LBR_INFO_CYCLES; + + if (static_cpu_has(X86_FEATURE_ARCH_LBR) && + (!static_branch_likely(&x86_lbr_cycles) || + !(info & LBR_INFO_CYC_CNT_VALID))) + cycles = 0; + + return cycles; +} + +static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, + struct lbr_entry *entries) +{ + struct perf_branch_entry *e; + struct lbr_entry *lbr; + u64 from, to, info; + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr = entries ? &entries[i] : NULL; + e = &cpuc->lbr_entries[i]; + + from = rdlbr_from(i, lbr); + /* + * Read LBR entries until invalid entry (0s) is detected. + */ + if (!from) + break; + + to = rdlbr_to(i, lbr); + info = rdlbr_info(i, lbr); + + perf_clear_branch_entry_bitfields(e); + + e->from = from; + e->to = to; + e->mispred = get_lbr_mispred(info); + e->predicted = !e->mispred; + e->in_tx = !!(info & LBR_INFO_IN_TX); + e->abort = !!(info & LBR_INFO_ABORT); + e->cycles = get_lbr_cycles(info); + e->type = get_lbr_br_type(info); + } + + cpuc->lbr_stack.nr = i; +} + +static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) +{ + intel_pmu_store_lbr(cpuc, NULL); +} + +static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) +{ + struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; + + if (!xsave) { + intel_pmu_store_lbr(cpuc, NULL); + return; + } + xsaves(&xsave->xsave, XFEATURE_MASK_LBR); + + intel_pmu_store_lbr(cpuc, xsave->lbr.entries); +} + +void intel_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * Don't read when all LBRs users are using adaptive PEBS. + * + * This could be smarter and actually check the event, + * but this simple approach seems to work for now. + */ + if (!cpuc->lbr_users || vlbr_exclude_host() || + cpuc->lbr_users == cpuc->lbr_pebs_users) + return; + + x86_pmu.lbr_read(cpuc); + + intel_pmu_lbr_filter(cpuc); +} + +/* + * SW filter is used: + * - in case there is no HW filter + * - in case the HW filter has errata or limitations + */ +static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +{ + u64 br_type = event->attr.branch_sample_type; + int mask = 0; + + if (br_type & PERF_SAMPLE_BRANCH_USER) + mask |= X86_BR_USER; + + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + mask |= X86_BR_KERNEL; + + /* we ignore BRANCH_HV here */ + + if (br_type & PERF_SAMPLE_BRANCH_ANY) + mask |= X86_BR_ANY; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) + mask |= X86_BR_ANY_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; + + if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) + mask |= X86_BR_IND_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) + mask |= X86_BR_ABORT; + + if (br_type & PERF_SAMPLE_BRANCH_IN_TX) + mask |= X86_BR_IN_TX; + + if (br_type & PERF_SAMPLE_BRANCH_NO_TX) + mask |= X86_BR_NO_TX; + + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + + if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { + if (!x86_pmu_has_lbr_callstack()) + return -EOPNOTSUPP; + if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) + return -EINVAL; + mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | + X86_BR_CALL_STACK; + } + + if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) + mask |= X86_BR_IND_JMP; + + if (br_type & PERF_SAMPLE_BRANCH_CALL) + mask |= X86_BR_CALL | X86_BR_ZERO_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) + mask |= X86_BR_TYPE_SAVE; + + /* + * stash actual user request into reg, it may + * be used by fixup code for some CPU + */ + event->hw.branch_reg.reg = mask; + return 0; +} + +/* + * setup the HW LBR filter + * Used only when available, may not be enough to disambiguate + * all branches, may need the help of the SW filter + */ +static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) +{ + struct hw_perf_event_extra *reg; + u64 br_type = event->attr.branch_sample_type; + u64 mask = 0, v; + int i; + + for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + if (!(br_type & (1ULL << i))) + continue; + + v = x86_pmu.lbr_sel_map[i]; + if (v == LBR_NOT_SUPP) + return -EOPNOTSUPP; + + if (v != LBR_IGN) + mask |= v; + } + + reg = &event->hw.branch_reg; + reg->idx = EXTRA_REG_LBR; + + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { + reg->config = mask; + + /* + * The Arch LBR HW can retrieve the common branch types + * from the LBR_INFO. It doesn't require the high overhead + * SW disassemble. + * Enable the branch type by default for the Arch LBR. + */ + reg->reg |= X86_BR_TYPE_SAVE; + return 0; + } + + /* + * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate + * in suppress mode. So LBR_SELECT should be set to + * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) + * But the 10th bit LBR_CALL_STACK does not operate + * in suppress mode. + */ + reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); + + if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && + (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && + x86_pmu.lbr_has_info) + reg->config |= LBR_NO_INFO; + + return 0; +} + +int intel_pmu_setup_lbr_filter(struct perf_event *event) +{ + int ret = 0; + + /* + * no LBR on this PMU + */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + /* + * setup SW LBR filter + */ + ret = intel_pmu_setup_sw_lbr_filter(event); + if (ret) + return ret; + + /* + * setup HW LBR filter, if any + */ + if (x86_pmu.lbr_sel_map) + ret = intel_pmu_setup_hw_lbr_filter(event); + + return ret; +} + +enum { + ARCH_LBR_BR_TYPE_JCC = 0, + ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, + ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, + ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, + ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, + ARCH_LBR_BR_TYPE_NEAR_RET = 5, + ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, + + ARCH_LBR_BR_TYPE_MAP_MAX = 16, +}; + +static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { + [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, + [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, + [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, + [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, + [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, + [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, +}; + +/* + * implement actual branch filter based on user demand. + * Hardware may not exactly satisfy that request, thus + * we need to inspect opcodes. Mismatched branches are + * discarded. Therefore, the number of branches returned + * in PERF_SAMPLE_BRANCH_STACK sample may vary. + */ +static void +intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) +{ + u64 from, to; + int br_sel = cpuc->br_sel; + int i, j, type, to_plm; + bool compress = false; + + /* if sampling all branches, then nothing to filter */ + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) + return; + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + + from = cpuc->lbr_entries[i].from; + to = cpuc->lbr_entries[i].to; + type = cpuc->lbr_entries[i].type; + + /* + * Parse the branch type recorded in LBR_x_INFO MSR. + * Doesn't support OTHER_BRANCH decoding for now. + * OTHER_BRANCH branch type still rely on software decoding. + */ + if (static_cpu_has(X86_FEATURE_ARCH_LBR) && + type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + type = arch_lbr_br_type_map[type] | to_plm; + } else + type = branch_type(from, to, cpuc->lbr_entries[i].abort); + if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { + if (cpuc->lbr_entries[i].in_tx) + type |= X86_BR_IN_TX; + else + type |= X86_BR_NO_TX; + } + + /* if type does not correspond, then discard */ + if (type == X86_BR_NONE || (br_sel & type) != type) { + cpuc->lbr_entries[i].from = 0; + compress = true; + } + + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) + cpuc->lbr_entries[i].type = common_branch_type(type); + } + + if (!compress) + return; + + /* remove all entries with from=0 */ + for (i = 0; i < cpuc->lbr_stack.nr; ) { + if (!cpuc->lbr_entries[i].from) { + j = i; + while (++j < cpuc->lbr_stack.nr) + cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; + cpuc->lbr_stack.nr--; + if (!cpuc->lbr_entries[i].from) + continue; + } + i++; + } +} + +void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* Cannot get TOS for large PEBS and Arch LBR */ + if (static_cpu_has(X86_FEATURE_ARCH_LBR) || + (cpuc->n_pebs == cpuc->n_large_pebs)) + cpuc->lbr_stack.hw_idx = -1ULL; + else + cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); + + intel_pmu_store_lbr(cpuc, lbr); + intel_pmu_lbr_filter(cpuc); +} + +/* + * Map interface branch filters onto LBR filters + */ +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches + */ + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = + LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include IND_JMP to capture IND_CALL + */ + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, +}; + +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, +}; + +static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_RETURN | LBR_CALL_STACK, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, +}; + +static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | + ARCH_LBR_OTHER_BRANCH, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | + ARCH_LBR_IND_CALL | + ARCH_LBR_OTHER_BRANCH, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | + ARCH_LBR_IND_CALL | + ARCH_LBR_RETURN | + ARCH_LBR_CALL_STACK, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, +}; + +/* core */ +void __init intel_pmu_lbr_init_core(void) +{ + x86_pmu.lbr_nr = 4; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ +} + +/* nehalem/westmere */ +void __init intel_pmu_lbr_init_nhm(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + + /* + * SW branch filter usage: + * - workaround LBR_SEL errata (see above) + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ +} + +/* sandy bridge */ +void __init intel_pmu_lbr_init_snb(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* + * SW branch filter usage: + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ +} + +static inline struct kmem_cache * +create_lbr_kmem_cache(size_t size, size_t align) +{ + return kmem_cache_create("x86_lbr", size, align, 0, NULL); +} + +/* haswell */ +void intel_pmu_lbr_init_hsw(void) +{ + size_t size = sizeof(struct x86_perf_task_context); + + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); +} + +/* skylake */ +__init void intel_pmu_lbr_init_skl(void) +{ + size_t size = sizeof(struct x86_perf_task_context); + + x86_pmu.lbr_nr = 32; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + x86_pmu.lbr_info = MSR_LBR_INFO_0; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); + + /* + * SW branch filter usage: + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ +} + +/* atom */ +void __init intel_pmu_lbr_init_atom(void) +{ + /* + * only models starting at stepping 10 seems + * to have an operational LBR which can freeze + * on PMU interrupt + */ + if (boot_cpu_data.x86_model == 28 + && boot_cpu_data.x86_stepping < 10) { + pr_cont("LBR disabled due to erratum"); + return; + } + + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ +} + +/* slm */ +void __init intel_pmu_lbr_init_slm(void) +{ + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("8-deep LBR, "); +} + +/* Knights Landing */ +void intel_pmu_lbr_init_knl(void) +{ + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* Knights Landing does have MISPREDICT bit */ + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) + x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; +} + +void intel_pmu_lbr_init(void) +{ + switch (x86_pmu.intel_cap.lbr_format) { + case LBR_FORMAT_EIP_FLAGS2: + x86_pmu.lbr_has_tsx = 1; + x86_pmu.lbr_from_flags = 1; + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); + break; + + case LBR_FORMAT_EIP_FLAGS: + x86_pmu.lbr_from_flags = 1; + break; + + case LBR_FORMAT_INFO: + x86_pmu.lbr_has_tsx = 1; + fallthrough; + case LBR_FORMAT_INFO2: + x86_pmu.lbr_has_info = 1; + break; + + case LBR_FORMAT_TIME: + x86_pmu.lbr_from_flags = 1; + x86_pmu.lbr_to_cycles = 1; + break; + } + + if (x86_pmu.lbr_has_info) { + /* + * Only used in combination with baseline pebs. + */ + static_branch_enable(&x86_lbr_mispred); + static_branch_enable(&x86_lbr_cycles); + } +} + +/* + * LBR state size is variable based on the max number of registers. + * This calculates the expected state size, which should match + * what the hardware enumerates for the size of XFEATURE_LBR. + */ +static inline unsigned int get_lbr_state_size(void) +{ + return sizeof(struct arch_lbr_state) + + x86_pmu.lbr_nr * sizeof(struct lbr_entry); +} + +static bool is_arch_lbr_xsave_available(void) +{ + if (!boot_cpu_has(X86_FEATURE_XSAVES)) + return false; + + /* + * Check the LBR state with the corresponding software structure. + * Disable LBR XSAVES support if the size doesn't match. + */ + if (xfeature_size(XFEATURE_LBR) == 0) + return false; + + if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) + return false; + + return true; +} + +void __init intel_pmu_arch_lbr_init(void) +{ + struct pmu *pmu = x86_get_pmu(smp_processor_id()); + union cpuid28_eax eax; + union cpuid28_ebx ebx; + union cpuid28_ecx ecx; + unsigned int unused_edx; + bool arch_lbr_xsave; + size_t size; + u64 lbr_nr; + + /* Arch LBR Capabilities */ + cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); + + lbr_nr = fls(eax.split.lbr_depth_mask) * 8; + if (!lbr_nr) + goto clear_arch_lbr; + + /* Apply the max depth of Arch LBR */ + if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) + goto clear_arch_lbr; + + x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; + x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; + x86_pmu.lbr_lip = eax.split.lbr_lip; + x86_pmu.lbr_cpl = ebx.split.lbr_cpl; + x86_pmu.lbr_filter = ebx.split.lbr_filter; + x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; + x86_pmu.lbr_mispred = ecx.split.lbr_mispred; + x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; + x86_pmu.lbr_br_type = ecx.split.lbr_br_type; + x86_pmu.lbr_nr = lbr_nr; + + if (x86_pmu.lbr_mispred) + static_branch_enable(&x86_lbr_mispred); + if (x86_pmu.lbr_timed_lbr) + static_branch_enable(&x86_lbr_cycles); + if (x86_pmu.lbr_br_type) + static_branch_enable(&x86_lbr_type); + + arch_lbr_xsave = is_arch_lbr_xsave_available(); + if (arch_lbr_xsave) { + size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + + get_lbr_state_size(); + pmu->task_ctx_cache = create_lbr_kmem_cache(size, + XSAVE_ALIGNMENT); + } + + if (!pmu->task_ctx_cache) { + arch_lbr_xsave = false; + + size = sizeof(struct x86_perf_task_context_arch_lbr) + + lbr_nr * sizeof(struct lbr_entry); + pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); + } + + x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; + x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; + x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; + + /* LBR callstack requires both CPL and Branch Filtering support */ + if (!x86_pmu.lbr_cpl || + !x86_pmu.lbr_filter || + !x86_pmu.lbr_call_stack) + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; + + if (!x86_pmu.lbr_cpl) { + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; + } else if (!x86_pmu.lbr_filter) { + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; + } + + x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; + x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; + + if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) + x86_pmu.lbr_ctl_map = NULL; + + x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; + if (arch_lbr_xsave) { + x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; + x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; + x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; + pr_cont("XSAVE "); + } else { + x86_pmu.lbr_save = intel_pmu_arch_lbr_save; + x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; + x86_pmu.lbr_read = intel_pmu_arch_lbr_read; + } + + pr_cont("Architectural LBR, "); + + return; + +clear_arch_lbr: + setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); +} + +/** + * x86_perf_get_lbr - get the LBR records information + * + * @lbr: the caller's memory to store the LBR records information + */ +void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +{ + int lbr_fmt = x86_pmu.intel_cap.lbr_format; + + lbr->nr = x86_pmu.lbr_nr; + lbr->from = x86_pmu.lbr_from; + lbr->to = x86_pmu.lbr_to; + lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; +} +EXPORT_SYMBOL_GPL(x86_perf_get_lbr); + +struct event_constraint vlbr_constraint = + __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), + FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c new file mode 100644 index 000000000..03bbcc2fa --- /dev/null +++ b/arch/x86/events/intel/p4.c @@ -0,0 +1,1404 @@ +/* + * Netburst Performance Events (P4, old Xeon) + * + * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov + * Copyright (C) 2010 Intel Corporation, Lin Ming + * + * For licencing details see kernel-base/COPYING + */ + +#include + +#include +#include +#include + +#include "../perf_event.h" + +#define P4_CNTR_LIMIT 3 +/* + * array indices: 0,1 - HT threads, used with HT enabled cpu + */ +struct p4_event_bind { + unsigned int opcode; /* Event code and ESCR selector */ + unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int escr_emask; /* valid ESCR EventMask bits */ + unsigned int shared; /* event is shared across threads */ + char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */ +}; + +struct p4_pebs_bind { + unsigned int metric_pebs; + unsigned int metric_vert; +}; + +/* it sets P4_PEBS_ENABLE_UOP_TAG as well */ +#define P4_GEN_PEBS_BIND(name, pebs, vert) \ + [P4_PEBS_METRIC__##name] = { \ + .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ + .metric_vert = vert, \ + } + +/* + * note we have P4_PEBS_ENABLE_UOP_TAG always set here + * + * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of + * event configuration to find out which values are to be + * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT + * registers + */ +static struct p4_pebs_bind p4_pebs_bind_map[] = { + P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), + P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), + P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), + P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), + P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), + P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), + P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), + P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), + P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), +}; + +/* + * Note that we don't use CCCR1 here, there is an + * exception for P4_BSQ_ALLOCATION but we just have + * no workaround + * + * consider this binding as resources which particular + * event may borrow, it doesn't contain EventMask, + * Tags and friends -- they are left to a caller + */ +static struct p4_event_bind p4_event_bind_map[] = { + [P4_EVENT_TC_DELIVER_MODE] = { + .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), + .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), + .shared = 1, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_BPU_FETCH_REQUEST] = { + .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), + .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_ITLB_REFERENCE] = { + .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), + .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_MEMORY_CANCEL] = { + .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), + .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_MEMORY_COMPLETE] = { + .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), + .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_LOAD_PORT_REPLAY] = { + .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), + .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_STORE_PORT_REPLAY] = { + .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), + .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_MOB_LOAD_REPLAY] = { + .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), + .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_PAGE_WALK_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), + .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), + .shared = 1, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_BSQ_CACHE_REFERENCE] = { + .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_IOQ_ALLOCATION] = { + .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ + .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), + .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), + .cntr = { {2, -1, -1}, {3, -1, -1} }, + }, + [P4_EVENT_FSB_DATA_ACTIVITY] = { + .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), + .shared = 1, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ + .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), + .cntr = { {0, -1, -1}, {1, -1, -1} }, + }, + [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ + .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), + .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), + .cntr = { {2, -1, -1}, {3, -1, -1} }, + }, + [P4_EVENT_SSE_INPUT_ASSIST] = { + .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_PACKED_SP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_PACKED_DP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_SCALAR_SP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_SCALAR_DP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_64BIT_MMX_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_128BIT_MMX_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_X87_FP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_TC_MISC] = { + .opcode = P4_OPCODE(P4_EVENT_TC_MISC), + .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_GLOBAL_POWER_EVENTS] = { + .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_TC_MS_XFER] = { + .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), + .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_UOP_QUEUE_WRITES] = { + .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), + .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), + .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_RETIRED_BRANCH_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), + .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_RESOURCE_STALL] = { + .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), + .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_WC_BUFFER] = { + .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), + .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), + .shared = 1, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_B2B_CYCLES] = { + .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_BNR] = { + .opcode = P4_OPCODE(P4_EVENT_BNR), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_SNOOP] = { + .opcode = P4_OPCODE(P4_EVENT_SNOOP), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_RESPONSE] = { + .opcode = P4_OPCODE(P4_EVENT_RESPONSE), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_FRONT_END_EVENT] = { + .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_EXECUTION_EVENT] = { + .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_REPLAY_EVENT] = { + .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_INSTR_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_UOPS_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_UOP_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), + .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_BRANCH_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_MISPRED_BRANCH_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_X87_ASSIST] = { + .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_MACHINE_CLEAR] = { + .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_INSTR_COMPLETED] = { + .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, +}; + +#define P4_GEN_CACHE_EVENT(event, bit, metric) \ + p4_config_pack_escr(P4_ESCR_EVENT(event) | \ + P4_ESCR_EMASK_BIT(event, bit)) | \ + p4_config_pack_cccr(metric | \ + P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) + +static __initconst const u64 p4_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_PEBS_METRIC__1stl_cache_load_miss_retired), + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_PEBS_METRIC__2ndl_cache_load_miss_retired), + }, +}, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_PEBS_METRIC__dtlb_load_miss_retired), + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_PEBS_METRIC__dtlb_store_miss_retired), + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, + P4_PEBS_METRIC__none), + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, + P4_PEBS_METRIC__none), + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +/* + * Because of Netburst being quite restricted in how many + * identical events may run simultaneously, we introduce event aliases, + * ie the different events which have the same functionality but + * utilize non-intersected resources (ESCR/CCCR/counter registers). + * + * This allow us to relax restrictions a bit and run two or more + * identical events together. + * + * Never set any custom internal bits such as P4_CONFIG_HT, + * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are + * either up to date automatically or not applicable at all. + */ +static struct p4_event_alias { + u64 original; + u64 alternative; +} p4_event_aliases[] = { + { + /* + * Non-halted cycles can be substituted with non-sleeping cycles (see + * Intel SDM Vol3b for details). We need this alias to be able + * to run nmi-watchdog and 'perf top' (or any other user space tool + * which is interested in running PERF_COUNT_HW_CPU_CYCLES) + * simultaneously. + */ + .original = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), + .alternative = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| + p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | + P4_CCCR_COMPARE), + }, +}; + +static u64 p4_get_alias_event(u64 config) +{ + u64 config_match; + int i; + + /* + * Only event with special mark is allowed, + * we're to be sure it didn't come as malformed + * RAW event. + */ + if (!(config & P4_CONFIG_ALIASABLE)) + return 0; + + config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; + + for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { + if (config_match == p4_event_aliases[i].original) { + config_match = p4_event_aliases[i].alternative; + break; + } else if (config_match == p4_event_aliases[i].alternative) { + config_match = p4_event_aliases[i].original; + break; + } + } + + if (i >= ARRAY_SIZE(p4_event_aliases)) + return 0; + + return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); +} + +static u64 p4_general_events[PERF_COUNT_HW_MAX] = { + /* non-halted CPU clocks */ + [PERF_COUNT_HW_CPU_CYCLES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) | + P4_CONFIG_ALIASABLE, + + /* + * retired instructions + * in a sake of simplicity we don't use the FSB tagging + */ + [PERF_COUNT_HW_INSTRUCTIONS] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), + + /* cache hits */ + [PERF_COUNT_HW_CACHE_REFERENCES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), + + /* cache misses */ + [PERF_COUNT_HW_CACHE_MISSES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), + + /* branch instructions retired */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), + + /* mispredicted branches retired */ + [PERF_COUNT_HW_BRANCH_MISSES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), + + /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ + [PERF_COUNT_HW_BUS_CYCLES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | + p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), +}; + +static struct p4_event_bind *p4_config_get_bind(u64 config) +{ + unsigned int evnt = p4_config_unpack_event(config); + struct p4_event_bind *bind = NULL; + + if (evnt < ARRAY_SIZE(p4_event_bind_map)) + bind = &p4_event_bind_map[evnt]; + + return bind; +} + +static u64 p4_pmu_event_map(int hw_event) +{ + struct p4_event_bind *bind; + unsigned int esel; + u64 config; + + config = p4_general_events[hw_event]; + bind = p4_config_get_bind(config); + esel = P4_OPCODE_ESEL(bind->opcode); + config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); + + return config; +} + +/* check cpu model specifics */ +static bool p4_event_match_cpu_model(unsigned int event_idx) +{ + /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ + if (event_idx == P4_EVENT_INSTR_COMPLETED) { + if (boot_cpu_data.x86_model != 3 && + boot_cpu_data.x86_model != 4 && + boot_cpu_data.x86_model != 6) + return false; + } + + /* + * For info + * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 + */ + + return true; +} + +static int p4_validate_raw_event(struct perf_event *event) +{ + unsigned int v, emask; + + /* User data may have out-of-bound event index */ + v = p4_config_unpack_event(event->attr.config); + if (v >= ARRAY_SIZE(p4_event_bind_map)) + return -EINVAL; + + /* It may be unsupported: */ + if (!p4_event_match_cpu_model(v)) + return -EINVAL; + + /* + * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as + * in Architectural Performance Monitoring, it means not + * on _which_ logical cpu to count but rather _when_, ie it + * depends on logical cpu state -- count event if one cpu active, + * none, both or any, so we just allow user to pass any value + * desired. + * + * In turn we always set Tx_OS/Tx_USR bits bound to logical + * cpu without their propagation to another cpu + */ + + /* + * if an event is shared across the logical threads + * the user needs special permissions to be able to use it + */ + if (p4_ht_active() && p4_event_bind_map[v].shared) { + v = perf_allow_cpu(&event->attr); + if (v) + return v; + } + + /* ESCR EventMask bits may be invalid */ + emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; + if (emask & ~p4_event_bind_map[v].escr_emask) + return -EINVAL; + + /* + * it may have some invalid PEBS bits + */ + if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) + return -EINVAL; + + v = p4_config_unpack_metric(event->attr.config); + if (v >= ARRAY_SIZE(p4_pebs_bind_map)) + return -EINVAL; + + return 0; +} + +static int p4_hw_config(struct perf_event *event) +{ + int cpu = get_cpu(); + int rc = 0; + u32 escr, cccr; + + /* + * the reason we use cpu that early is that: if we get scheduled + * first time on the same cpu -- we will not need swap thread + * specific flags in config (and will save some cpu cycles) + */ + + cccr = p4_default_cccr_conf(cpu); + escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, + event->attr.exclude_user); + event->hw.config = p4_config_pack_escr(escr) | + p4_config_pack_cccr(cccr); + + if (p4_ht_active() && p4_ht_thread(cpu)) + event->hw.config = p4_set_ht_bit(event->hw.config); + + if (event->attr.type == PERF_TYPE_RAW) { + struct p4_event_bind *bind; + unsigned int esel; + /* + * Clear bits we reserve to be managed by kernel itself + * and never allowed from a user space + */ + event->attr.config &= P4_CONFIG_MASK; + + rc = p4_validate_raw_event(event); + if (rc) + goto out; + + /* + * Note that for RAW events we allow user to use P4_CCCR_RESERVED + * bits since we keep additional info here (for cache events and etc) + */ + event->hw.config |= event->attr.config; + bind = p4_config_get_bind(event->attr.config); + if (!bind) { + rc = -EINVAL; + goto out; + } + esel = P4_OPCODE_ESEL(bind->opcode); + event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); + } + + rc = x86_setup_perfctr(event); +out: + put_cpu(); + return rc; +} + +static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) +{ + u64 v; + + /* an official way for overflow indication */ + rdmsrl(hwc->config_base, v); + if (v & P4_CCCR_OVF) { + wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); + return 1; + } + + /* + * In some circumstances the overflow might issue an NMI but did + * not set P4_CCCR_OVF bit. Because a counter holds a negative value + * we simply check for high bit being set, if it's cleared it means + * the counter has reached zero value and continued counting before + * real NMI signal was received: + */ + rdmsrl(hwc->event_base, v); + if (!(v & ARCH_P4_UNFLAGGED_BIT)) + return 1; + + return 0; +} + +static void p4_pmu_disable_pebs(void) +{ + /* + * FIXME + * + * It's still allowed that two threads setup same cache + * events so we can't simply clear metrics until we knew + * no one is depending on us, so we need kind of counter + * for "ReplayEvent" users. + * + * What is more complex -- RAW events, if user (for some + * reason) will pass some cache event metric with improper + * event opcode -- it's fine from hardware point of view + * but completely nonsense from "meaning" of such action. + * + * So at moment let leave metrics turned on forever -- it's + * ok for now but need to be revisited! + * + * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0); + * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0); + */ +} + +static inline void p4_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * If event gets disabled while counter is in overflowed + * state we need to clear P4_CCCR_OVF, otherwise interrupt get + * asserted again and again + */ + (void)wrmsrl_safe(hwc->config_base, + p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); +} + +static void p4_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_event *event = cpuc->events[idx]; + if (!test_bit(idx, cpuc->active_mask)) + continue; + p4_pmu_disable_event(event); + } + + p4_pmu_disable_pebs(); +} + +/* configuration must be valid */ +static void p4_pmu_enable_pebs(u64 config) +{ + struct p4_pebs_bind *bind; + unsigned int idx; + + BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); + + idx = p4_config_unpack_metric(config); + if (idx == P4_PEBS_METRIC__none) + return; + + bind = &p4_pebs_bind_map[idx]; + + (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); + (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); +} + +static void __p4_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int thread = p4_ht_config_thread(hwc->config); + u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); + unsigned int idx = p4_config_unpack_event(hwc->config); + struct p4_event_bind *bind; + u64 escr_addr, cccr; + + bind = &p4_event_bind_map[idx]; + escr_addr = bind->escr_msr[thread]; + + /* + * - we dont support cascaded counters yet + * - and counter 1 is broken (erratum) + */ + WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); + WARN_ON_ONCE(hwc->idx == 1); + + /* we need a real Event value */ + escr_conf &= ~P4_ESCR_EVENT_MASK; + escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); + + cccr = p4_config_unpack_cccr(hwc->config); + + /* + * it could be Cache event so we need to write metrics + * into additional MSRs + */ + p4_pmu_enable_pebs(hwc->config); + + (void)wrmsrl_safe(escr_addr, escr_conf); + (void)wrmsrl_safe(hwc->config_base, + (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); +} + +static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(X86_PMC_IDX_MAX)], p4_running); + +static void p4_pmu_enable_event(struct perf_event *event) +{ + int idx = event->hw.idx; + + __set_bit(idx, per_cpu(p4_running, smp_processor_id())); + __p4_pmu_enable_event(event); +} + +static void p4_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_event *event = cpuc->events[idx]; + if (!test_bit(idx, cpuc->active_mask)) + continue; + __p4_pmu_enable_event(event); + } +} + +static int p4_pmu_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = this_cpu_read(pmc_prev_left[hwc->idx]); + int ret; + + ret = x86_perf_event_set_period(event); + + if (hwc->event_base) { + /* + * This handles erratum N15 in intel doc 249199-029, + * the counter may not be updated correctly on write + * so we need a second write operation to do the trick + * (the official workaround didn't work) + * + * the former idea is taken from OProfile code + */ + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + } + + return ret; +} + +static int p4_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + cpuc = this_cpu_ptr(&cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + int overflow; + + if (!test_bit(idx, cpuc->active_mask)) { + /* catch in-flight IRQs */ + if (__test_and_clear_bit(idx, per_cpu(p4_running, smp_processor_id()))) + handled++; + continue; + } + + event = cpuc->events[idx]; + hwc = &event->hw; + + WARN_ON_ONCE(hwc->idx != idx); + + /* it might be unflagged overflow */ + overflow = p4_pmu_clear_cccr_ovf(hwc); + + val = x86_perf_event_update(event); + if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) + continue; + + handled += overflow; + + /* event overflow for sure */ + perf_sample_data_init(&data, 0, hwc->last_period); + + if (!static_call(x86_pmu_set_period)(event)) + continue; + + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + /* + * When dealing with the unmasking of the LVTPC on P4 perf hw, it has + * been observed that the OVF bit flag has to be cleared first _before_ + * the LVTPC can be unmasked. + * + * The reason is the NMI line will continue to be asserted while the OVF + * bit is set. This causes a second NMI to generate if the LVTPC is + * unmasked before the OVF bit is cleared, leading to unknown NMI + * messages. + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + + return handled; +} + +/* + * swap thread specific fields according to a thread + * we are going to run on + */ +static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) +{ + u32 escr, cccr; + + /* + * we either lucky and continue on same cpu or no HT support + */ + if (!p4_should_swap_ts(hwc->config, cpu)) + return; + + /* + * the event is migrated from an another logical + * cpu, so we need to swap thread specific flags + */ + + escr = p4_config_unpack_escr(hwc->config); + cccr = p4_config_unpack_cccr(hwc->config); + + if (p4_ht_thread(cpu)) { + cccr &= ~P4_CCCR_OVF_PMI_T0; + cccr |= P4_CCCR_OVF_PMI_T1; + if (escr & P4_ESCR_T0_OS) { + escr &= ~P4_ESCR_T0_OS; + escr |= P4_ESCR_T1_OS; + } + if (escr & P4_ESCR_T0_USR) { + escr &= ~P4_ESCR_T0_USR; + escr |= P4_ESCR_T1_USR; + } + hwc->config = p4_config_pack_escr(escr); + hwc->config |= p4_config_pack_cccr(cccr); + hwc->config |= P4_CONFIG_HT; + } else { + cccr &= ~P4_CCCR_OVF_PMI_T1; + cccr |= P4_CCCR_OVF_PMI_T0; + if (escr & P4_ESCR_T1_OS) { + escr &= ~P4_ESCR_T1_OS; + escr |= P4_ESCR_T0_OS; + } + if (escr & P4_ESCR_T1_USR) { + escr &= ~P4_ESCR_T1_USR; + escr |= P4_ESCR_T0_USR; + } + hwc->config = p4_config_pack_escr(escr); + hwc->config |= p4_config_pack_cccr(cccr); + hwc->config &= ~P4_CONFIG_HT; + } +} + +/* + * ESCR address hashing is tricky, ESCRs are not sequential + * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and + * the metric between any ESCRs is laid in range [0xa0,0xe1] + * + * so we make ~70% filled hashtable + */ + +#define P4_ESCR_MSR_BASE 0x000003a0 +#define P4_ESCR_MSR_MAX 0x000003e1 +#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) +#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) +#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr + +static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), + P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), +}; + +static int p4_get_escr_idx(unsigned int addr) +{ + unsigned int idx = P4_ESCR_MSR_IDX(addr); + + if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || + !p4_escr_table[idx] || + p4_escr_table[idx] != addr)) { + WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); + return -1; + } + + return idx; +} + +static int p4_next_cntr(int thread, unsigned long *used_mask, + struct p4_event_bind *bind) +{ + int i, j; + + for (i = 0; i < P4_CNTR_LIMIT; i++) { + j = bind->cntr[thread][i]; + if (j != -1 && !test_bit(j, used_mask)) + return j; + } + + return -1; +} + +static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; + int cpu = smp_processor_id(); + struct hw_perf_event *hwc; + struct p4_event_bind *bind; + unsigned int i, thread, num; + int cntr_idx, escr_idx; + u64 config_alias; + int pass; + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); + + for (i = 0, num = n; i < n; i++, num--) { + + hwc = &cpuc->event_list[i]->hw; + thread = p4_ht_thread(cpu); + pass = 0; + +again: + /* + * It's possible to hit a circular lock + * between original and alternative events + * if both are scheduled already. + */ + if (pass > 2) + goto done; + + bind = p4_config_get_bind(hwc->config); + escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); + if (unlikely(escr_idx == -1)) + goto done; + + if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { + cntr_idx = hwc->idx; + if (assign) + assign[i] = hwc->idx; + goto reserve; + } + + cntr_idx = p4_next_cntr(thread, used_mask, bind); + if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { + /* + * Check whether an event alias is still available. + */ + config_alias = p4_get_alias_event(hwc->config); + if (!config_alias) + goto done; + hwc->config = config_alias; + pass++; + goto again; + } + /* + * Perf does test runs to see if a whole group can be assigned + * together successfully. There can be multiple rounds of this. + * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config + * bits, such that the next round of group assignments will + * cause the above p4_should_swap_ts to pass instead of fail. + * This leads to counters exclusive to thread0 being used by + * thread1. + * + * Solve this with a cheap hack, reset the idx back to -1 to + * force a new lookup (p4_next_cntr) to get the right counter + * for the right thread. + * + * This probably doesn't comply with the general spirit of how + * perf wants to work, but P4 is special. :-( + */ + if (p4_should_swap_ts(hwc->config, cpu)) + hwc->idx = -1; + p4_pmu_swap_config_ts(hwc, cpu); + if (assign) + assign[i] = cntr_idx; +reserve: + set_bit(cntr_idx, used_mask); + set_bit(escr_idx, escr_mask); + } + +done: + return num ? -EINVAL : 0; +} + +PMU_FORMAT_ATTR(cccr, "config:0-31" ); +PMU_FORMAT_ATTR(escr, "config:32-62"); +PMU_FORMAT_ATTR(ht, "config:63" ); + +static struct attribute *intel_p4_formats_attr[] = { + &format_attr_cccr.attr, + &format_attr_escr.attr, + &format_attr_ht.attr, + NULL, +}; + +static __initconst const struct x86_pmu p4_pmu = { + .name = "Netburst P4/Xeon", + .handle_irq = p4_pmu_handle_irq, + .disable_all = p4_pmu_disable_all, + .enable_all = p4_pmu_enable_all, + .enable = p4_pmu_enable_event, + .disable = p4_pmu_disable_event, + + .set_period = p4_pmu_set_period, + + .eventsel = MSR_P4_BPU_CCCR0, + .perfctr = MSR_P4_BPU_PERFCTR0, + .event_map = p4_pmu_event_map, + .max_events = ARRAY_SIZE(p4_general_events), + .get_event_constraints = x86_get_event_constraints, + /* + * IF HT disabled we may need to use all + * ARCH_P4_MAX_CCCR counters simultaneously + * though leave it restricted at moment assuming + * HT is on + */ + .num_counters = ARCH_P4_MAX_CCCR, + .apic = 1, + .cntval_bits = ARCH_P4_CNTRVAL_BITS, + .cntval_mask = ARCH_P4_CNTRVAL_MASK, + .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, + .hw_config = p4_hw_config, + .schedule_events = p4_pmu_schedule_events, + + .format_attrs = intel_p4_formats_attr, +}; + +__init int p4_pmu_init(void) +{ + unsigned int low, high; + int i, reg; + + /* If we get stripped -- indexing fails */ + BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (!(low & (1 << 7))) { + pr_cont("unsupported Netburst CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Netburst events, "); + + x86_pmu = p4_pmu; + + /* + * Even though the counters are configured to interrupt a particular + * logical processor when an overflow happens, testing has shown that + * on kdump kernels (which uses a single cpu), thread1's counter + * continues to run and will report an NMI on thread0. Due to the + * overflow bug, this leads to a stream of unknown NMIs. + * + * Solve this by zero'ing out the registers to mimic a reset. + */ + for (i = 0; i < x86_pmu.num_counters; i++) { + reg = x86_pmu_config_addr(i); + wrmsrl_safe(reg, 0ULL); + } + + return 0; +} diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c new file mode 100644 index 000000000..408879b0c --- /dev/null +++ b/arch/x86/events/intel/p6.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include "../perf_event.h" + +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */ + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */ + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */ + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */ + +}; + +static const u64 __initconst p6_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static u64 p6_pmu_event_map(int hw_event) +{ + return p6_perfmon_event_map[hw_event]; +} + +/* + * Event setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_EVENT 0x0000002EULL + +static struct event_constraint p6_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; + +static void p6_pmu_disable_all(void) +{ + u64 val; + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void p6_pmu_enable_all(int added) +{ + unsigned long val; + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static inline void +p6_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val = P6_NOP_EVENT; + + (void)wrmsrl_safe(hwc->config_base, val); +} + +static void p6_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + + /* + * p6 only has a global event enable, set on PerfEvtSel0 + * We "disable" events by programming P6_NOP_EVENT + * and we rely on p6_pmu_enable_all() being called + * to actually enable the events. + */ + + (void)wrmsrl_safe(hwc->config_base, val); +} + +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_p6_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static __initconst const struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = x86_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_event, + .disable = p6_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_counters = 2, + /* + * Events have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a event for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .cntval_bits = 32, + .cntval_mask = (1ULL << 32) - 1, + .get_event_constraints = x86_get_event_constraints, + .event_constraints = p6_event_constraints, + + .format_attrs = intel_p6_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + +}; + +static __init void p6_pmu_rdpmc_quirk(void) +{ + if (boot_cpu_data.x86_stepping < 9) { + /* + * PPro erratum 26; fixed in stepping 9 and above. + */ + pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n"); + x86_pmu.attr_rdpmc_broken = 1; + x86_pmu.attr_rdpmc = 0; + } +} + +__init int p6_pmu_init(void) +{ + x86_pmu = p6_pmu; + + switch (boot_cpu_data.x86_model) { + case 1: /* Pentium Pro */ + x86_add_quirk(p6_pmu_rdpmc_quirk); + break; + + case 3: /* Pentium II - Klamath */ + case 5: /* Pentium II - Deschutes */ + case 6: /* Pentium II - Mendocino */ + break; + + case 7: /* Pentium III - Katmai */ + case 8: /* Pentium III - Coppermine */ + case 10: /* Pentium III Xeon */ + case 11: /* Pentium III - Tualatin */ + break; + + case 9: /* Pentium M - Banias */ + case 13: /* Pentium M - Dothan */ + break; + + default: + pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); + return -ENODEV; + } + + memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c new file mode 100644 index 000000000..42a557940 --- /dev/null +++ b/arch/x86/events/intel/pt.c @@ -0,0 +1,1814 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel(R) Processor Trace PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + * + * Intel PT is specified in the Intel Architecture Instruction Set Extensions + * Programming Reference: + * http://software.intel.com/en-us/intel-isa-extensions + */ + +#undef DEBUG + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "../perf_event.h" +#include "pt.h" + +static DEFINE_PER_CPU(struct pt, pt_ctx); + +static struct pt_pmu pt_pmu; + +/* + * Capabilities of Intel PT hardware, such as number of address bits or + * supported output schemes, are cached and exported to userspace as "caps" + * attribute group of pt pmu device + * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store + * relevant bits together with intel_pt traces. + * + * These are necessary for both trace decoding (payloads_lip, contains address + * width encoded in IP-related packets), and event configuration (bitmasks with + * permitted values for certain bit fields). + */ +#define PT_CAP(_n, _l, _r, _m) \ + [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \ + .reg = _r, .mask = _m } + +static struct pt_cap_desc { + const char *name; + u32 leaf; + u8 reg; + u32 mask; +} pt_caps[] = { + PT_CAP(max_subleaf, 0, CPUID_EAX, 0xffffffff), + PT_CAP(cr3_filtering, 0, CPUID_EBX, BIT(0)), + PT_CAP(psb_cyc, 0, CPUID_EBX, BIT(1)), + PT_CAP(ip_filtering, 0, CPUID_EBX, BIT(2)), + PT_CAP(mtc, 0, CPUID_EBX, BIT(3)), + PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)), + PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)), + PT_CAP(event_trace, 0, CPUID_EBX, BIT(7)), + PT_CAP(tnt_disable, 0, CPUID_EBX, BIT(8)), + PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)), + PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)), + PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), + PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), + PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), + PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), + PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), + PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), +}; + +u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) +{ + struct pt_cap_desc *cd = &pt_caps[capability]; + u32 c = caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; + unsigned int shift = __ffs(cd->mask); + + return (c & cd->mask) >> shift; +} +EXPORT_SYMBOL_GPL(intel_pt_validate_cap); + +u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) +{ + return intel_pt_validate_cap(pt_pmu.caps, cap); +} +EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap); + +static ssize_t pt_cap_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + enum pt_capabilities cap = (long)ea->var; + + return snprintf(buf, PAGE_SIZE, "%x\n", intel_pt_validate_hw_cap(cap)); +} + +static struct attribute_group pt_cap_group __ro_after_init = { + .name = "caps", +}; + +PMU_FORMAT_ATTR(pt, "config:0" ); +PMU_FORMAT_ATTR(cyc, "config:1" ); +PMU_FORMAT_ATTR(pwr_evt, "config:4" ); +PMU_FORMAT_ATTR(fup_on_ptw, "config:5" ); +PMU_FORMAT_ATTR(mtc, "config:9" ); +PMU_FORMAT_ATTR(tsc, "config:10" ); +PMU_FORMAT_ATTR(noretcomp, "config:11" ); +PMU_FORMAT_ATTR(ptw, "config:12" ); +PMU_FORMAT_ATTR(branch, "config:13" ); +PMU_FORMAT_ATTR(event, "config:31" ); +PMU_FORMAT_ATTR(notnt, "config:55" ); +PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); +PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); +PMU_FORMAT_ATTR(psb_period, "config:24-27" ); + +static struct attribute *pt_formats_attr[] = { + &format_attr_pt.attr, + &format_attr_cyc.attr, + &format_attr_pwr_evt.attr, + &format_attr_event.attr, + &format_attr_notnt.attr, + &format_attr_fup_on_ptw.attr, + &format_attr_mtc.attr, + &format_attr_tsc.attr, + &format_attr_noretcomp.attr, + &format_attr_ptw.attr, + &format_attr_branch.attr, + &format_attr_mtc_period.attr, + &format_attr_cyc_thresh.attr, + &format_attr_psb_period.attr, + NULL, +}; + +static struct attribute_group pt_format_group = { + .name = "format", + .attrs = pt_formats_attr, +}; + +static ssize_t +pt_timing_attr_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + switch (pmu_attr->id) { + case 0: + return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio); + case 1: + return sprintf(page, "%u:%u\n", + pt_pmu.tsc_art_num, + pt_pmu.tsc_art_den); + default: + break; + } + + return -EINVAL; +} + +PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0, + pt_timing_attr_show); +PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1, + pt_timing_attr_show); + +static struct attribute *pt_timing_attr[] = { + &timing_attr_max_nonturbo_ratio.attr.attr, + &timing_attr_tsc_art_ratio.attr.attr, + NULL, +}; + +static struct attribute_group pt_timing_group = { + .attrs = pt_timing_attr, +}; + +static const struct attribute_group *pt_attr_groups[] = { + &pt_cap_group, + &pt_format_group, + &pt_timing_group, + NULL, +}; + +static int __init pt_pmu_hw_init(void) +{ + struct dev_ext_attribute *de_attrs; + struct attribute **attrs; + size_t size; + u64 reg; + int ret; + long i; + + rdmsrl(MSR_PLATFORM_INFO, reg); + pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8; + + /* + * if available, read in TSC to core crystal clock ratio, + * otherwise, zero for numerator stands for "not enumerated" + * as per SDM + */ + if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) { + u32 eax, ebx, ecx, edx; + + cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx); + + pt_pmu.tsc_art_num = ebx; + pt_pmu.tsc_art_den = eax; + } + + /* model-specific quirks */ + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_BROADWELL: + case INTEL_FAM6_BROADWELL_D: + case INTEL_FAM6_BROADWELL_G: + case INTEL_FAM6_BROADWELL_X: + /* not setting BRANCH_EN will #GP, erratum BDM106 */ + pt_pmu.branch_en_always_on = true; + break; + default: + break; + } + + if (boot_cpu_has(X86_FEATURE_VMX)) { + /* + * Intel SDM, 36.5 "Tracing post-VMXON" says that + * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace + * post-VMXON. + */ + rdmsrl(MSR_IA32_VMX_MISC, reg); + if (reg & BIT(14)) + pt_pmu.vmx = true; + } + + for (i = 0; i < PT_CPUID_LEAVES; i++) { + cpuid_count(20, i, + &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]); + } + + ret = -ENOMEM; + size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1); + attrs = kzalloc(size, GFP_KERNEL); + if (!attrs) + goto fail; + + size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1); + de_attrs = kzalloc(size, GFP_KERNEL); + if (!de_attrs) + goto fail; + + for (i = 0; i < ARRAY_SIZE(pt_caps); i++) { + struct dev_ext_attribute *de_attr = de_attrs + i; + + de_attr->attr.attr.name = pt_caps[i].name; + + sysfs_attr_init(&de_attr->attr.attr); + + de_attr->attr.attr.mode = S_IRUGO; + de_attr->attr.show = pt_cap_show; + de_attr->var = (void *)i; + + attrs[i] = &de_attr->attr.attr; + } + + pt_cap_group.attrs = attrs; + + return 0; + +fail: + kfree(attrs); + + return ret; +} + +#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \ + RTIT_CTL_CYC_THRESH | \ + RTIT_CTL_PSB_FREQ) + +#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \ + RTIT_CTL_MTC_RANGE) + +#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \ + RTIT_CTL_FUP_ON_PTW) + +/* + * Bit 0 (TraceEn) in the attr.config is meaningless as the + * corresponding bit in the RTIT_CTL can only be controlled + * by the driver; therefore, repurpose it to mean: pass + * through the bit that was previously assumed to be always + * on for PT, thereby allowing the user to *not* set it if + * they so wish. See also pt_event_valid() and pt_config(). + */ +#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN + +#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN | \ + RTIT_CTL_TSC_EN | \ + RTIT_CTL_DISRETC | \ + RTIT_CTL_BRANCH_EN | \ + RTIT_CTL_CYC_PSB | \ + RTIT_CTL_MTC | \ + RTIT_CTL_PWR_EVT_EN | \ + RTIT_CTL_EVENT_EN | \ + RTIT_CTL_NOTNT | \ + RTIT_CTL_FUP_ON_PTW | \ + RTIT_CTL_PTW_EN) + +static bool pt_event_valid(struct perf_event *event) +{ + u64 config = event->attr.config; + u64 allowed, requested; + + if ((config & PT_CONFIG_MASK) != config) + return false; + + if (config & RTIT_CTL_CYC_PSB) { + if (!intel_pt_validate_hw_cap(PT_CAP_psb_cyc)) + return false; + + allowed = intel_pt_validate_hw_cap(PT_CAP_psb_periods); + requested = (config & RTIT_CTL_PSB_FREQ) >> + RTIT_CTL_PSB_FREQ_OFFSET; + if (requested && (!(allowed & BIT(requested)))) + return false; + + allowed = intel_pt_validate_hw_cap(PT_CAP_cycle_thresholds); + requested = (config & RTIT_CTL_CYC_THRESH) >> + RTIT_CTL_CYC_THRESH_OFFSET; + if (requested && (!(allowed & BIT(requested)))) + return false; + } + + if (config & RTIT_CTL_MTC) { + /* + * In the unlikely case that CPUID lists valid mtc periods, + * but not the mtc capability, drop out here. + * + * Spec says that setting mtc period bits while mtc bit in + * CPUID is 0 will #GP, so better safe than sorry. + */ + if (!intel_pt_validate_hw_cap(PT_CAP_mtc)) + return false; + + allowed = intel_pt_validate_hw_cap(PT_CAP_mtc_periods); + if (!allowed) + return false; + + requested = (config & RTIT_CTL_MTC_RANGE) >> + RTIT_CTL_MTC_RANGE_OFFSET; + + if (!(allowed & BIT(requested))) + return false; + } + + if (config & RTIT_CTL_PWR_EVT_EN && + !intel_pt_validate_hw_cap(PT_CAP_power_event_trace)) + return false; + + if (config & RTIT_CTL_EVENT_EN && + !intel_pt_validate_hw_cap(PT_CAP_event_trace)) + return false; + + if (config & RTIT_CTL_NOTNT && + !intel_pt_validate_hw_cap(PT_CAP_tnt_disable)) + return false; + + if (config & RTIT_CTL_PTW) { + if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite)) + return false; + + /* FUPonPTW without PTW doesn't make sense */ + if ((config & RTIT_CTL_FUP_ON_PTW) && + !(config & RTIT_CTL_PTW_EN)) + return false; + } + + /* + * Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config + * clears the assumption that BranchEn must always be enabled, + * as was the case with the first implementation of PT. + * If this bit is not set, the legacy behavior is preserved + * for compatibility with the older userspace. + * + * Re-using bit 0 for this purpose is fine because it is never + * directly set by the user; previous attempts at setting it in + * the attr.config resulted in -EINVAL. + */ + if (config & RTIT_CTL_PASSTHROUGH) { + /* + * Disallow not setting BRANCH_EN where BRANCH_EN is + * always required. + */ + if (pt_pmu.branch_en_always_on && + !(config & RTIT_CTL_BRANCH_EN)) + return false; + } else { + /* + * Disallow BRANCH_EN without the PASSTHROUGH. + */ + if (config & RTIT_CTL_BRANCH_EN) + return false; + } + + return true; +} + +/* + * PT configuration helpers + * These all are cpu affine and operate on a local PT + */ + +static void pt_config_start(struct perf_event *event) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + u64 ctl = event->hw.config; + + ctl |= RTIT_CTL_TRACEEN; + if (READ_ONCE(pt->vmx_on)) + perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); + else + wrmsrl(MSR_IA32_RTIT_CTL, ctl); + + WRITE_ONCE(event->hw.config, ctl); +} + +/* Address ranges and their corresponding msr configuration registers */ +static const struct pt_address_range { + unsigned long msr_a; + unsigned long msr_b; + unsigned int reg_off; +} pt_address_ranges[] = { + { + .msr_a = MSR_IA32_RTIT_ADDR0_A, + .msr_b = MSR_IA32_RTIT_ADDR0_B, + .reg_off = RTIT_CTL_ADDR0_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR1_A, + .msr_b = MSR_IA32_RTIT_ADDR1_B, + .reg_off = RTIT_CTL_ADDR1_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR2_A, + .msr_b = MSR_IA32_RTIT_ADDR2_B, + .reg_off = RTIT_CTL_ADDR2_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR3_A, + .msr_b = MSR_IA32_RTIT_ADDR3_B, + .reg_off = RTIT_CTL_ADDR3_OFFSET, + } +}; + +static u64 pt_config_filters(struct perf_event *event) +{ + struct pt_filters *filters = event->hw.addr_filters; + struct pt *pt = this_cpu_ptr(&pt_ctx); + unsigned int range = 0; + u64 rtit_ctl = 0; + + if (!filters) + return 0; + + perf_event_addr_filters_sync(event); + + for (range = 0; range < filters->nr_filters; range++) { + struct pt_filter *filter = &filters->filter[range]; + + /* + * Note, if the range has zero start/end addresses due + * to its dynamic object not being loaded yet, we just + * go ahead and program zeroed range, which will simply + * produce no data. Note^2: if executable code at 0x0 + * is a concern, we can set up an "invalid" configuration + * such as msr_b < msr_a. + */ + + /* avoid redundant msr writes */ + if (pt->filters.filter[range].msr_a != filter->msr_a) { + wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a); + pt->filters.filter[range].msr_a = filter->msr_a; + } + + if (pt->filters.filter[range].msr_b != filter->msr_b) { + wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b); + pt->filters.filter[range].msr_b = filter->msr_b; + } + + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; + } + + return rtit_ctl; +} + +static void pt_config(struct perf_event *event) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf = perf_get_aux(&pt->handle); + u64 reg; + + /* First round: clear STATUS, in particular the PSB byte counter. */ + if (!event->hw.config) { + perf_event_itrace_started(event); + wrmsrl(MSR_IA32_RTIT_STATUS, 0); + } + + reg = pt_config_filters(event); + reg |= RTIT_CTL_TRACEEN; + if (!buf->single) + reg |= RTIT_CTL_TOPA; + + /* + * Previously, we had BRANCH_EN on by default, but now that PT has + * grown features outside of branch tracing, it is useful to allow + * the user to disable it. Setting bit 0 in the event's attr.config + * allows BRANCH_EN to pass through instead of being always on. See + * also the comment in pt_event_valid(). + */ + if (event->attr.config & BIT(0)) { + reg |= event->attr.config & RTIT_CTL_BRANCH_EN; + } else { + reg |= RTIT_CTL_BRANCH_EN; + } + + if (!event->attr.exclude_kernel) + reg |= RTIT_CTL_OS; + if (!event->attr.exclude_user) + reg |= RTIT_CTL_USR; + + reg |= (event->attr.config & PT_CONFIG_MASK); + + event->hw.config = reg; + pt_config_start(event); +} + +static void pt_config_stop(struct perf_event *event) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + u64 ctl = READ_ONCE(event->hw.config); + + /* may be already stopped by a PMI */ + if (!(ctl & RTIT_CTL_TRACEEN)) + return; + + ctl &= ~RTIT_CTL_TRACEEN; + if (!READ_ONCE(pt->vmx_on)) + wrmsrl(MSR_IA32_RTIT_CTL, ctl); + + WRITE_ONCE(event->hw.config, ctl); + + /* + * A wrmsr that disables trace generation serializes other PT + * registers and causes all data packets to be written to memory, + * but a fence is required for the data to become globally visible. + * + * The below WMB, separating data store and aux_head store matches + * the consumer's RMB that separates aux_head load and data load. + */ + wmb(); +} + +/** + * struct topa - ToPA metadata + * @list: linkage to struct pt_buffer's list of tables + * @offset: offset of the first entry in this table in the buffer + * @size: total size of all entries in this table + * @last: index of the last initialized entry in this table + * @z_count: how many times the first entry repeats + */ +struct topa { + struct list_head list; + u64 offset; + size_t size; + int last; + unsigned int z_count; +}; + +/* + * Keep ToPA table-related metadata on the same page as the actual table, + * taking up a few words from the top + */ + +#define TENTS_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry)) + +/** + * struct topa_page - page-sized ToPA table with metadata at the top + * @table: actual ToPA table entries, as understood by PT hardware + * @topa: metadata + */ +struct topa_page { + struct topa_entry table[TENTS_PER_PAGE]; + struct topa topa; +}; + +static inline struct topa_page *topa_to_page(struct topa *topa) +{ + return container_of(topa, struct topa_page, topa); +} + +static inline struct topa_page *topa_entry_to_page(struct topa_entry *te) +{ + return (struct topa_page *)((unsigned long)te & PAGE_MASK); +} + +static inline phys_addr_t topa_pfn(struct topa *topa) +{ + return PFN_DOWN(virt_to_phys(topa_to_page(topa))); +} + +/* make -1 stand for the last table entry */ +#define TOPA_ENTRY(t, i) \ + ((i) == -1 \ + ? &topa_to_page(t)->table[(t)->last] \ + : &topa_to_page(t)->table[(i)]) +#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size)) +#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size) + +static void pt_config_buffer(struct pt_buffer *buf) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + u64 reg, mask; + void *base; + + if (buf->single) { + base = buf->data_pages[0]; + mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7; + } else { + base = topa_to_page(buf->cur)->table; + mask = (u64)buf->cur_idx; + } + + reg = virt_to_phys(base); + if (pt->output_base != reg) { + pt->output_base = reg; + wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg); + } + + reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32); + if (pt->output_mask != reg) { + pt->output_mask = reg; + wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); + } +} + +/** + * topa_alloc() - allocate page-sized ToPA table + * @cpu: CPU on which to allocate. + * @gfp: Allocation flags. + * + * Return: On success, return the pointer to ToPA table page. + */ +static struct topa *topa_alloc(int cpu, gfp_t gfp) +{ + int node = cpu_to_node(cpu); + struct topa_page *tp; + struct page *p; + + p = alloc_pages_node(node, gfp | __GFP_ZERO, 0); + if (!p) + return NULL; + + tp = page_address(p); + tp->topa.last = 0; + + /* + * In case of singe-entry ToPA, always put the self-referencing END + * link as the 2nd entry in the table + */ + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { + TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT; + TOPA_ENTRY(&tp->topa, 1)->end = 1; + } + + return &tp->topa; +} + +/** + * topa_free() - free a page-sized ToPA table + * @topa: Table to deallocate. + */ +static void topa_free(struct topa *topa) +{ + free_page((unsigned long)topa); +} + +/** + * topa_insert_table() - insert a ToPA table into a buffer + * @buf: PT buffer that's being extended. + * @topa: New topa table to be inserted. + * + * If it's the first table in this buffer, set up buffer's pointers + * accordingly; otherwise, add a END=1 link entry to @topa to the current + * "last" table and adjust the last table pointer to @topa. + */ +static void topa_insert_table(struct pt_buffer *buf, struct topa *topa) +{ + struct topa *last = buf->last; + + list_add_tail(&topa->list, &buf->tables); + + if (!buf->first) { + buf->first = buf->last = buf->cur = topa; + return; + } + + topa->offset = last->offset + last->size; + buf->last = topa; + + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) + return; + + BUG_ON(last->last != TENTS_PER_PAGE - 1); + + TOPA_ENTRY(last, -1)->base = topa_pfn(topa); + TOPA_ENTRY(last, -1)->end = 1; +} + +/** + * topa_table_full() - check if a ToPA table is filled up + * @topa: ToPA table. + */ +static bool topa_table_full(struct topa *topa) +{ + /* single-entry ToPA is a special case */ + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) + return !!topa->last; + + return topa->last == TENTS_PER_PAGE - 1; +} + +/** + * topa_insert_pages() - create a list of ToPA tables + * @buf: PT buffer being initialized. + * @gfp: Allocation flags. + * + * This initializes a list of ToPA tables with entries from + * the data_pages provided by rb_alloc_aux(). + * + * Return: 0 on success or error code. + */ +static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp) +{ + struct topa *topa = buf->last; + int order = 0; + struct page *p; + + p = virt_to_page(buf->data_pages[buf->nr_pages]); + if (PagePrivate(p)) + order = page_private(p); + + if (topa_table_full(topa)) { + topa = topa_alloc(cpu, gfp); + if (!topa) + return -ENOMEM; + + topa_insert_table(buf, topa); + } + + if (topa->z_count == topa->last - 1) { + if (order == TOPA_ENTRY(topa, topa->last - 1)->size) + topa->z_count++; + } + + TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT; + TOPA_ENTRY(topa, -1)->size = order; + if (!buf->snapshot && + !intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { + TOPA_ENTRY(topa, -1)->intr = 1; + TOPA_ENTRY(topa, -1)->stop = 1; + } + + topa->last++; + topa->size += sizes(order); + + buf->nr_pages += 1ul << order; + + return 0; +} + +/** + * pt_topa_dump() - print ToPA tables and their entries + * @buf: PT buffer. + */ +static void pt_topa_dump(struct pt_buffer *buf) +{ + struct topa *topa; + + list_for_each_entry(topa, &buf->tables, list) { + struct topa_page *tp = topa_to_page(topa); + int i; + + pr_debug("# table @%p, off %llx size %zx\n", tp->table, + topa->offset, topa->size); + for (i = 0; i < TENTS_PER_PAGE; i++) { + pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n", + &tp->table[i], + (unsigned long)tp->table[i].base << TOPA_SHIFT, + sizes(tp->table[i].size), + tp->table[i].end ? 'E' : ' ', + tp->table[i].intr ? 'I' : ' ', + tp->table[i].stop ? 'S' : ' ', + *(u64 *)&tp->table[i]); + if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) && + tp->table[i].stop) || + tp->table[i].end) + break; + if (!i && topa->z_count) + i += topa->z_count; + } + } +} + +/** + * pt_buffer_advance() - advance to the next output region + * @buf: PT buffer. + * + * Advance the current pointers in the buffer to the next ToPA entry. + */ +static void pt_buffer_advance(struct pt_buffer *buf) +{ + buf->output_off = 0; + buf->cur_idx++; + + if (buf->cur_idx == buf->cur->last) { + if (buf->cur == buf->last) + buf->cur = buf->first; + else + buf->cur = list_entry(buf->cur->list.next, struct topa, + list); + buf->cur_idx = 0; + } +} + +/** + * pt_update_head() - calculate current offsets and sizes + * @pt: Per-cpu pt context. + * + * Update buffer's current write pointer position and data size. + */ +static void pt_update_head(struct pt *pt) +{ + struct pt_buffer *buf = perf_get_aux(&pt->handle); + u64 topa_idx, base, old; + + if (buf->single) { + local_set(&buf->data_size, buf->output_off); + return; + } + + /* offset of the first region in this table from the beginning of buf */ + base = buf->cur->offset + buf->output_off; + + /* offset of the current output region within this table */ + for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++) + base += TOPA_ENTRY_SIZE(buf->cur, topa_idx); + + if (buf->snapshot) { + local_set(&buf->data_size, base); + } else { + old = (local64_xchg(&buf->head, base) & + ((buf->nr_pages << PAGE_SHIFT) - 1)); + if (base < old) + base += buf->nr_pages << PAGE_SHIFT; + + local_add(base - old, &buf->data_size); + } +} + +/** + * pt_buffer_region() - obtain current output region's address + * @buf: PT buffer. + */ +static void *pt_buffer_region(struct pt_buffer *buf) +{ + return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); +} + +/** + * pt_buffer_region_size() - obtain current output region's size + * @buf: PT buffer. + */ +static size_t pt_buffer_region_size(struct pt_buffer *buf) +{ + return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx); +} + +/** + * pt_handle_status() - take care of possible status conditions + * @pt: Per-cpu pt context. + */ +static void pt_handle_status(struct pt *pt) +{ + struct pt_buffer *buf = perf_get_aux(&pt->handle); + int advance = 0; + u64 status; + + rdmsrl(MSR_IA32_RTIT_STATUS, status); + + if (status & RTIT_STATUS_ERROR) { + pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n"); + pt_topa_dump(buf); + status &= ~RTIT_STATUS_ERROR; + } + + if (status & RTIT_STATUS_STOPPED) { + status &= ~RTIT_STATUS_STOPPED; + + /* + * On systems that only do single-entry ToPA, hitting STOP + * means we are already losing data; need to let the decoder + * know. + */ + if (!buf->single && + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || + buf->output_off == pt_buffer_region_size(buf))) { + perf_aux_output_flag(&pt->handle, + PERF_AUX_FLAG_TRUNCATED); + advance++; + } + } + + /* + * Also on single-entry ToPA implementations, interrupt will come + * before the output reaches its output region's boundary. + */ + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) && + !buf->snapshot && + pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) { + void *head = pt_buffer_region(buf); + + /* everything within this margin needs to be zeroed out */ + memset(head + buf->output_off, 0, + pt_buffer_region_size(buf) - + buf->output_off); + advance++; + } + + if (advance) + pt_buffer_advance(buf); + + wrmsrl(MSR_IA32_RTIT_STATUS, status); +} + +/** + * pt_read_offset() - translate registers into buffer pointers + * @buf: PT buffer. + * + * Set buffer's output pointers from MSR values. + */ +static void pt_read_offset(struct pt_buffer *buf) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct topa_page *tp; + + if (!buf->single) { + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); + tp = phys_to_virt(pt->output_base); + buf->cur = &tp->topa; + } + + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); + /* offset within current output region */ + buf->output_off = pt->output_mask >> 32; + /* index of current output region within this table */ + if (!buf->single) + buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7; +} + +static struct topa_entry * +pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg) +{ + struct topa_page *tp; + struct topa *topa; + unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0; + + /* + * Indicates a bug in the caller. + */ + if (WARN_ON_ONCE(pg >= buf->nr_pages)) + return NULL; + + /* + * First, find the ToPA table where @pg fits. With high + * order allocations, there shouldn't be many of these. + */ + list_for_each_entry(topa, &buf->tables, list) { + if (topa->offset + topa->size > pg << PAGE_SHIFT) + goto found; + } + + /* + * Hitting this means we have a problem in the ToPA + * allocation code. + */ + WARN_ON_ONCE(1); + + return NULL; + +found: + /* + * Indicates a problem in the ToPA allocation code. + */ + if (WARN_ON_ONCE(topa->last == -1)) + return NULL; + + tp = topa_to_page(topa); + cur_pg = PFN_DOWN(topa->offset); + if (topa->z_count) { + z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1); + start_idx = topa->z_count + 1; + } + + /* + * Multiple entries at the beginning of the table have the same size, + * ideally all of them; if @pg falls there, the search is done. + */ + if (pg >= cur_pg && pg < cur_pg + z_pg) { + idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0); + return &tp->table[idx]; + } + + /* + * Otherwise, slow path: iterate through the remaining entries. + */ + for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) { + if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg) + return &tp->table[idx]; + + cur_pg += TOPA_ENTRY_PAGES(topa, idx); + } + + /* + * Means we couldn't find a ToPA entry in the table that does match. + */ + WARN_ON_ONCE(1); + + return NULL; +} + +static struct topa_entry * +pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te) +{ + unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1); + struct topa_page *tp; + struct topa *topa; + + tp = (struct topa_page *)table; + if (tp->table != te) + return --te; + + topa = &tp->topa; + if (topa == buf->first) + topa = buf->last; + else + topa = list_prev_entry(topa, list); + + tp = topa_to_page(topa); + + return &tp->table[topa->last - 1]; +} + +/** + * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer + * @buf: PT buffer. + * @handle: Current output handle. + * + * Place INT and STOP marks to prevent overwriting old data that the consumer + * hasn't yet collected and waking up the consumer after a certain fraction of + * the buffer has filled up. Only needed and sensible for non-snapshot counters. + * + * This obviously relies on buf::head to figure out buffer markers, so it has + * to be called after pt_buffer_reset_offsets() and before the hardware tracing + * is enabled. + */ +static int pt_buffer_reset_markers(struct pt_buffer *buf, + struct perf_output_handle *handle) + +{ + unsigned long head = local64_read(&buf->head); + unsigned long idx, npages, wakeup; + + if (buf->single) + return 0; + + /* can't stop in the middle of an output region */ + if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) { + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + return -EINVAL; + } + + + /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) + return 0; + + /* clear STOP and INT from current entry */ + if (buf->stop_te) { + buf->stop_te->stop = 0; + buf->stop_te->intr = 0; + } + + if (buf->intr_te) + buf->intr_te->intr = 0; + + /* how many pages till the STOP marker */ + npages = handle->size >> PAGE_SHIFT; + + /* if it's on a page boundary, fill up one more page */ + if (!offset_in_page(head + handle->size + 1)) + npages++; + + idx = (head >> PAGE_SHIFT) + npages; + idx &= buf->nr_pages - 1; + + if (idx != buf->stop_pos) { + buf->stop_pos = idx; + buf->stop_te = pt_topa_entry_for_page(buf, idx); + buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te); + } + + wakeup = handle->wakeup >> PAGE_SHIFT; + + /* in the worst case, wake up the consumer one page before hard stop */ + idx = (head >> PAGE_SHIFT) + npages - 1; + if (idx > wakeup) + idx = wakeup; + + idx &= buf->nr_pages - 1; + if (idx != buf->intr_pos) { + buf->intr_pos = idx; + buf->intr_te = pt_topa_entry_for_page(buf, idx); + buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te); + } + + buf->stop_te->stop = 1; + buf->stop_te->intr = 1; + buf->intr_te->intr = 1; + + return 0; +} + +/** + * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head + * @buf: PT buffer. + * @head: Write pointer (aux_head) from AUX buffer. + * + * Find the ToPA table and entry corresponding to given @head and set buffer's + * "current" pointers accordingly. This is done after we have obtained the + * current aux_head position from a successful call to perf_aux_output_begin() + * to make sure the hardware is writing to the right place. + * + * This function modifies buf::{cur,cur_idx,output_off} that will be programmed + * into PT msrs when the tracing is enabled and buf::head and buf::data_size, + * which are used to determine INT and STOP markers' locations by a subsequent + * call to pt_buffer_reset_markers(). + */ +static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) +{ + struct topa_page *cur_tp; + struct topa_entry *te; + int pg; + + if (buf->snapshot) + head &= (buf->nr_pages << PAGE_SHIFT) - 1; + + if (!buf->single) { + pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); + te = pt_topa_entry_for_page(buf, pg); + + cur_tp = topa_entry_to_page(te); + buf->cur = &cur_tp->topa; + buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0); + buf->output_off = head & (pt_buffer_region_size(buf) - 1); + } else { + buf->output_off = head; + } + + local64_set(&buf->head, head); + local_set(&buf->data_size, 0); +} + +/** + * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer + * @buf: PT buffer. + */ +static void pt_buffer_fini_topa(struct pt_buffer *buf) +{ + struct topa *topa, *iter; + + if (buf->single) + return; + + list_for_each_entry_safe(topa, iter, &buf->tables, list) { + /* + * right now, this is in free_aux() path only, so + * no need to unlink this table from the list + */ + topa_free(topa); + } +} + +/** + * pt_buffer_init_topa() - initialize ToPA table for pt buffer + * @buf: PT buffer. + * @size: Total size of all regions within this ToPA. + * @gfp: Allocation flags. + */ +static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu, + unsigned long nr_pages, gfp_t gfp) +{ + struct topa *topa; + int err; + + topa = topa_alloc(cpu, gfp); + if (!topa) + return -ENOMEM; + + topa_insert_table(buf, topa); + + while (buf->nr_pages < nr_pages) { + err = topa_insert_pages(buf, cpu, gfp); + if (err) { + pt_buffer_fini_topa(buf); + return -ENOMEM; + } + } + + /* link last table to the first one, unless we're double buffering */ + if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { + TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first); + TOPA_ENTRY(buf->last, -1)->end = 1; + } + + pt_topa_dump(buf); + return 0; +} + +static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) +{ + struct page *p = virt_to_page(buf->data_pages[0]); + int ret = -ENOTSUPP, order = 0; + + /* + * We can use single range output mode + * + in snapshot mode, where we don't need interrupts; + * + if the hardware supports it; + * + if the entire buffer is one contiguous allocation. + */ + if (!buf->snapshot) + goto out; + + if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output)) + goto out; + + if (PagePrivate(p)) + order = page_private(p); + + if (1 << order != nr_pages) + goto out; + + /* + * Some processors cannot always support single range for more than + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might + * also be affected, so for now rather than trying to keep track of + * which ones, just disable it for all. + */ + if (nr_pages > 1) + goto out; + + buf->single = true; + buf->nr_pages = nr_pages; + ret = 0; +out: + return ret; +} + +/** + * pt_buffer_setup_aux() - set up topa tables for a PT buffer + * @cpu: Cpu on which to allocate, -1 means current. + * @pages: Array of pointers to buffer pages passed from perf core. + * @nr_pages: Number of pages in the buffer. + * @snapshot: If this is a snapshot/overwrite counter. + * + * This is a pmu::setup_aux callback that sets up ToPA tables and all the + * bookkeeping for an AUX buffer. + * + * Return: Our private PT buffer structure. + */ +static void * +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) +{ + struct pt_buffer *buf; + int node, ret, cpu = event->cpu; + + if (!nr_pages) + return NULL; + + /* + * Only support AUX sampling in snapshot mode, where we don't + * generate NMIs. + */ + if (event->attr.aux_sample_size && !snapshot) + return NULL; + + if (cpu == -1) + cpu = raw_smp_processor_id(); + node = cpu_to_node(cpu); + + buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->snapshot = snapshot; + buf->data_pages = pages; + buf->stop_pos = -1; + buf->intr_pos = -1; + + INIT_LIST_HEAD(&buf->tables); + + ret = pt_buffer_try_single(buf, nr_pages); + if (!ret) + return buf; + + ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL); + if (ret) { + kfree(buf); + return NULL; + } + + return buf; +} + +/** + * pt_buffer_free_aux() - perf AUX deallocation path callback + * @data: PT buffer. + */ +static void pt_buffer_free_aux(void *data) +{ + struct pt_buffer *buf = data; + + pt_buffer_fini_topa(buf); + kfree(buf); +} + +static int pt_addr_filters_init(struct perf_event *event) +{ + struct pt_filters *filters; + int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); + + if (!intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) + return 0; + + filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node); + if (!filters) + return -ENOMEM; + + if (event->parent) + memcpy(filters, event->parent->hw.addr_filters, + sizeof(*filters)); + + event->hw.addr_filters = filters; + + return 0; +} + +static void pt_addr_filters_fini(struct perf_event *event) +{ + kfree(event->hw.addr_filters); + event->hw.addr_filters = NULL; +} + +#ifdef CONFIG_X86_64 +/* Clamp to a canonical address greater-than-or-equal-to the address given */ +static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) +{ + return __is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + -BIT_ULL(vaddr_bits - 1); +} + +/* Clamp to a canonical address less-than-or-equal-to the address given */ +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) +{ + return __is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + BIT_ULL(vaddr_bits - 1) - 1; +} +#else +#define clamp_to_ge_canonical_addr(x, y) (x) +#define clamp_to_le_canonical_addr(x, y) (x) +#endif + +static int pt_event_addr_filters_validate(struct list_head *filters) +{ + struct perf_addr_filter *filter; + int range = 0; + + list_for_each_entry(filter, filters, entry) { + /* + * PT doesn't support single address triggers and + * 'start' filters. + */ + if (!filter->size || + filter->action == PERF_ADDR_FILTER_ACTION_START) + return -EOPNOTSUPP; + + if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) + return -EOPNOTSUPP; + } + + return 0; +} + +static void pt_event_addr_filters_sync(struct perf_event *event) +{ + struct perf_addr_filters_head *head = perf_event_addr_filters(event); + unsigned long msr_a, msr_b; + struct perf_addr_filter_range *fr = event->addr_filter_ranges; + struct pt_filters *filters = event->hw.addr_filters; + struct perf_addr_filter *filter; + int range = 0; + + if (!filters) + return; + + list_for_each_entry(filter, &head->list, entry) { + if (filter->path.dentry && !fr[range].start) { + msr_a = msr_b = 0; + } else { + unsigned long n = fr[range].size - 1; + unsigned long a = fr[range].start; + unsigned long b; + + if (a > ULONG_MAX - n) + b = ULONG_MAX; + else + b = a + n; + /* + * Apply the offset. 64-bit addresses written to the + * MSRs must be canonical, but the range can encompass + * non-canonical addresses. Since software cannot + * execute at non-canonical addresses, adjusting to + * canonical addresses does not affect the result of the + * address filter. + */ + msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits); + msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits); + if (msr_b < msr_a) + msr_a = msr_b = 0; + } + + filters->filter[range].msr_a = msr_a; + filters->filter[range].msr_b = msr_b; + if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER) + filters->filter[range].config = 1; + else + filters->filter[range].config = 2; + range++; + } + + filters->nr_filters = range; +} + +/** + * intel_pt_interrupt() - PT PMI handler + */ +void intel_pt_interrupt(void) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf; + struct perf_event *event = pt->handle.event; + + /* + * There may be a dangling PT bit in the interrupt status register + * after PT has been disabled by pt_event_stop(). Make sure we don't + * do anything (particularly, re-enable) for this event here. + */ + if (!READ_ONCE(pt->handle_nmi)) + return; + + if (!event) + return; + + pt_config_stop(event); + + buf = perf_get_aux(&pt->handle); + if (!buf) + return; + + pt_read_offset(buf); + + pt_handle_status(pt); + + pt_update_head(pt); + + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); + + if (!event->hw.state) { + int ret; + + buf = perf_aux_output_begin(&pt->handle, event); + if (!buf) { + event->hw.state = PERF_HES_STOPPED; + return; + } + + pt_buffer_reset_offsets(buf, pt->handle.head); + /* snapshot counters don't use PMI, so it's safe */ + ret = pt_buffer_reset_markers(buf, &pt->handle); + if (ret) { + perf_aux_output_end(&pt->handle, 0); + return; + } + + pt_config_buffer(buf); + pt_config_start(event); + } +} + +void intel_pt_handle_vmx(int on) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct perf_event *event; + unsigned long flags; + + /* PT plays nice with VMX, do nothing */ + if (pt_pmu.vmx) + return; + + /* + * VMXON will clear RTIT_CTL.TraceEn; we need to make + * sure to not try to set it while VMX is on. Disable + * interrupts to avoid racing with pmu callbacks; + * concurrent PMI should be handled fine. + */ + local_irq_save(flags); + WRITE_ONCE(pt->vmx_on, on); + + /* + * If an AUX transaction is in progress, it will contain + * gap(s), so flag it PARTIAL to inform the user. + */ + event = pt->handle.event; + if (event) + perf_aux_output_flag(&pt->handle, + PERF_AUX_FLAG_PARTIAL); + + /* Turn PTs back on */ + if (!on && event) + wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); + +/* + * PMU callbacks + */ + +static void pt_event_start(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf; + + buf = perf_aux_output_begin(&pt->handle, event); + if (!buf) + goto fail_stop; + + pt_buffer_reset_offsets(buf, pt->handle.head); + if (!buf->snapshot) { + if (pt_buffer_reset_markers(buf, &pt->handle)) + goto fail_end_stop; + } + + WRITE_ONCE(pt->handle_nmi, 1); + hwc->state = 0; + + pt_config_buffer(buf); + pt_config(event); + + return; + +fail_end_stop: + perf_aux_output_end(&pt->handle, 0); +fail_stop: + hwc->state = PERF_HES_STOPPED; +} + +static void pt_event_stop(struct perf_event *event, int mode) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + + /* + * Protect against the PMI racing with disabling wrmsr, + * see comment in intel_pt_interrupt(). + */ + WRITE_ONCE(pt->handle_nmi, 0); + + pt_config_stop(event); + + if (event->hw.state == PERF_HES_STOPPED) + return; + + event->hw.state = PERF_HES_STOPPED; + + if (mode & PERF_EF_UPDATE) { + struct pt_buffer *buf = perf_get_aux(&pt->handle); + + if (!buf) + return; + + if (WARN_ON_ONCE(pt->handle.event != event)) + return; + + pt_read_offset(buf); + + pt_handle_status(pt); + + pt_update_head(pt); + + if (buf->snapshot) + pt->handle.head = + local_xchg(&buf->data_size, + buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); + } +} + +static long pt_event_snapshot_aux(struct perf_event *event, + struct perf_output_handle *handle, + unsigned long size) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf = perf_get_aux(&pt->handle); + unsigned long from = 0, to; + long ret; + + if (WARN_ON_ONCE(!buf)) + return 0; + + /* + * Sampling is only allowed on snapshot events; + * see pt_buffer_setup_aux(). + */ + if (WARN_ON_ONCE(!buf->snapshot)) + return 0; + + /* + * Here, handle_nmi tells us if the tracing is on + */ + if (READ_ONCE(pt->handle_nmi)) + pt_config_stop(event); + + pt_read_offset(buf); + pt_update_head(pt); + + to = local_read(&buf->data_size); + if (to < size) + from = buf->nr_pages << PAGE_SHIFT; + from += to - size; + + ret = perf_output_copy_aux(&pt->handle, handle, from, to); + + /* + * If the tracing was on when we turned up, restart it. + * Compiler barrier not needed as we couldn't have been + * preempted by anything that touches pt->handle_nmi. + */ + if (pt->handle_nmi) + pt_config_start(event); + + return ret; +} + +static void pt_event_del(struct perf_event *event, int mode) +{ + pt_event_stop(event, PERF_EF_UPDATE); +} + +static int pt_event_add(struct perf_event *event, int mode) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct hw_perf_event *hwc = &event->hw; + int ret = -EBUSY; + + if (pt->handle.event) + goto fail; + + if (mode & PERF_EF_START) { + pt_event_start(event, 0); + ret = -EINVAL; + if (hwc->state == PERF_HES_STOPPED) + goto fail; + } else { + hwc->state = PERF_HES_STOPPED; + } + + ret = 0; +fail: + + return ret; +} + +static void pt_event_read(struct perf_event *event) +{ +} + +static void pt_event_destroy(struct perf_event *event) +{ + pt_addr_filters_fini(event); + x86_del_exclusive(x86_lbr_exclusive_pt); +} + +static int pt_event_init(struct perf_event *event) +{ + if (event->attr.type != pt_pmu.pmu.type) + return -ENOENT; + + if (!pt_event_valid(event)) + return -EINVAL; + + if (x86_add_exclusive(x86_lbr_exclusive_pt)) + return -EBUSY; + + if (pt_addr_filters_init(event)) { + x86_del_exclusive(x86_lbr_exclusive_pt); + return -ENOMEM; + } + + event->destroy = pt_event_destroy; + + return 0; +} + +void cpu_emergency_stop_pt(void) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + + if (pt->handle.event) + pt_event_stop(pt->handle.event, PERF_EF_UPDATE); +} + +int is_intel_pt_event(struct perf_event *event) +{ + return event->pmu == &pt_pmu.pmu; +} + +static __init int pt_init(void) +{ + int ret, cpu, prior_warn = 0; + + BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE); + + if (!boot_cpu_has(X86_FEATURE_INTEL_PT)) + return -ENODEV; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + u64 ctl; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); + if (!ret && (ctl & RTIT_CTL_TRACEEN)) + prior_warn++; + } + cpus_read_unlock(); + + if (prior_warn) { + x86_add_exclusive(x86_lbr_exclusive_pt); + pr_warn("PT is enabled at boot time, doing nothing\n"); + + return -EBUSY; + } + + ret = pt_pmu_hw_init(); + if (ret) + return ret; + + if (!intel_pt_validate_hw_cap(PT_CAP_topa_output)) { + pr_warn("ToPA output is not supported on this CPU\n"); + return -ENODEV; + } + + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; + + pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; + pt_pmu.pmu.attr_groups = pt_attr_groups; + pt_pmu.pmu.task_ctx_nr = perf_sw_context; + pt_pmu.pmu.event_init = pt_event_init; + pt_pmu.pmu.add = pt_event_add; + pt_pmu.pmu.del = pt_event_del; + pt_pmu.pmu.start = pt_event_start; + pt_pmu.pmu.stop = pt_event_stop; + pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux; + pt_pmu.pmu.read = pt_event_read; + pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; + pt_pmu.pmu.free_aux = pt_buffer_free_aux; + pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync; + pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate; + pt_pmu.pmu.nr_addr_filters = + intel_pt_validate_hw_cap(PT_CAP_num_address_ranges); + + ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); + + return ret; +} +arch_initcall(pt_init); diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h new file mode 100644 index 000000000..96906a62a --- /dev/null +++ b/arch/x86/events/intel/pt.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel(R) Processor Trace PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + * + * Intel PT is specified in the Intel Architecture Instruction Set Extensions + * Programming Reference: + * http://software.intel.com/en-us/intel-isa-extensions + */ + +#ifndef __INTEL_PT_H__ +#define __INTEL_PT_H__ + +/* + * Single-entry ToPA: when this close to region boundary, switch + * buffers to avoid losing data. + */ +#define TOPA_PMI_MARGIN 512 + +#define TOPA_SHIFT 12 + +static inline unsigned int sizes(unsigned int tsz) +{ + return 1 << (tsz + TOPA_SHIFT); +}; + +struct topa_entry { + u64 end : 1; + u64 rsvd0 : 1; + u64 intr : 1; + u64 rsvd1 : 1; + u64 stop : 1; + u64 rsvd2 : 1; + u64 size : 4; + u64 rsvd3 : 2; + u64 base : 36; + u64 rsvd4 : 16; +}; + +/* TSC to Core Crystal Clock Ratio */ +#define CPUID_TSC_LEAF 0x15 + +struct pt_pmu { + struct pmu pmu; + u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; + bool vmx; + bool branch_en_always_on; + unsigned long max_nonturbo_ratio; + unsigned int tsc_art_num; + unsigned int tsc_art_den; +}; + +/** + * struct pt_buffer - buffer configuration; one buffer per task_struct or + * cpu, depending on perf event configuration + * @tables: list of ToPA tables in this buffer + * @first: shorthand for first topa table + * @last: shorthand for last topa table + * @cur: current topa table + * @nr_pages: buffer size in pages + * @cur_idx: current output region's index within @cur table + * @output_off: offset within the current output region + * @data_size: running total of the amount of data in this buffer + * @lost: if data was lost/truncated + * @head: logical write offset inside the buffer + * @snapshot: if this is for a snapshot/overwrite counter + * @single: use Single Range Output instead of ToPA + * @stop_pos: STOP topa entry index + * @intr_pos: INT topa entry index + * @stop_te: STOP topa entry pointer + * @intr_te: INT topa entry pointer + * @data_pages: array of pages from perf + * @topa_index: table of topa entries indexed by page offset + */ +struct pt_buffer { + struct list_head tables; + struct topa *first, *last, *cur; + unsigned int cur_idx; + size_t output_off; + unsigned long nr_pages; + local_t data_size; + local64_t head; + bool snapshot; + bool single; + long stop_pos, intr_pos; + struct topa_entry *stop_te, *intr_te; + void **data_pages; +}; + +#define PT_FILTERS_NUM 4 + +/** + * struct pt_filter - IP range filter configuration + * @msr_a: range start, goes to RTIT_ADDRn_A + * @msr_b: range end, goes to RTIT_ADDRn_B + * @config: 4-bit field in RTIT_CTL + */ +struct pt_filter { + unsigned long msr_a; + unsigned long msr_b; + unsigned long config; +}; + +/** + * struct pt_filters - IP range filtering context + * @filter: filters defined for this context + * @nr_filters: number of defined filters in the @filter array + */ +struct pt_filters { + struct pt_filter filter[PT_FILTERS_NUM]; + unsigned int nr_filters; +}; + +/** + * struct pt - per-cpu pt context + * @handle: perf output handle + * @filters: last configured filters + * @handle_nmi: do handle PT PMI on this cpu, there's an active event + * @vmx_on: 1 if VMX is ON on this cpu + * @output_base: cached RTIT_OUTPUT_BASE MSR value + * @output_mask: cached RTIT_OUTPUT_MASK MSR value + */ +struct pt { + struct perf_output_handle handle; + struct pt_filters filters; + int handle_nmi; + int vmx_on; + u64 output_base; + u64 output_mask; +}; + +#endif /* __INTEL_PT_H__ */ diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c new file mode 100644 index 000000000..27b34f5b8 --- /dev/null +++ b/arch/x86/events/intel/uncore.c @@ -0,0 +1,1923 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#include +#include +#include "uncore.h" +#include "uncore_discovery.h" + +static bool uncore_no_discover; +module_param(uncore_no_discover, bool, 0); +MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " + "(default: enable the discovery mechanism)."); +struct intel_uncore_type *empty_uncore[] = { NULL, }; +struct intel_uncore_type **uncore_msr_uncores = empty_uncore; +struct intel_uncore_type **uncore_pci_uncores = empty_uncore; +struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; + +static bool pcidrv_registered; +struct pci_driver *uncore_pci_driver; +/* The PCI driver for the device which the uncore doesn't own. */ +struct pci_driver *uncore_pci_sub_driver; +/* pci bus to socket mapping */ +DEFINE_RAW_SPINLOCK(pci2phy_map_lock); +struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); +struct pci_extra_dev *uncore_extra_pci_dev; +int __uncore_max_dies; + +/* mask of cpus that collect uncore events */ +static cpumask_t uncore_cpu_mask; + +/* constraint for the fixed counter */ +static struct event_constraint uncore_constraint_fixed = + EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); +struct event_constraint uncore_constraint_empty = + EVENT_CONSTRAINT(0, 0, 0); + +MODULE_LICENSE("GPL"); + +int uncore_pcibus_to_dieid(struct pci_bus *bus) +{ + struct pci2phy_map *map; + int die_id = -1; + + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == pci_domain_nr(bus)) { + die_id = map->pbus_to_dieid[bus->number]; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + return die_id; +} + +int uncore_die_to_segment(int die) +{ + struct pci_bus *bus = NULL; + + /* Find first pci bus which attributes to specified die. */ + while ((bus = pci_find_next_bus(bus)) && + (die != uncore_pcibus_to_dieid(bus))) + ; + + return bus ? pci_domain_nr(bus) : -EINVAL; +} + +static void uncore_free_pcibus_map(void) +{ + struct pci2phy_map *map, *tmp; + + list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { + list_del(&map->list); + kfree(map); + } +} + +struct pci2phy_map *__find_pci2phy_map(int segment) +{ + struct pci2phy_map *map, *alloc = NULL; + int i; + + lockdep_assert_held(&pci2phy_map_lock); + +lookup: + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == segment) + goto end; + } + + if (!alloc) { + raw_spin_unlock(&pci2phy_map_lock); + alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); + raw_spin_lock(&pci2phy_map_lock); + + if (!alloc) + return NULL; + + goto lookup; + } + + map = alloc; + alloc = NULL; + map->segment = segment; + for (i = 0; i < 256; i++) + map->pbus_to_dieid[i] = -1; + list_add_tail(&map->list, &pci2phy_map_head); + +end: + kfree(alloc); + return map; +} + +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uncore_event_desc *event = + container_of(attr, struct uncore_event_desc, attr); + return sprintf(buf, "%s", event->config); +} + +struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +{ + unsigned int dieid = topology_logical_die_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; +} + +u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + u64 count; + + rdmsrl(event->hw.event_base, count); + + return count; +} + +void uncore_mmio_exit_box(struct intel_uncore_box *box) +{ + if (box->io_addr) + iounmap(box->io_addr); +} + +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (!box->io_addr) + return 0; + + if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) + return 0; + + return readq(box->io_addr + event->hw.event_base); +} + +/* + * generic get constraint function for shared match/mask registers. + */ +struct event_constraint * +uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + unsigned long flags; + bool ok = false; + + /* + * reg->alloc can be set due to existing state, so for fake box we + * need to ignore this, otherwise we might fail to allocate proper + * fake state for this extra reg constraint. + */ + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; + + er = &box->shared_regs[reg1->idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || + (er->config1 == reg1->config && er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (ok) { + if (!uncore_box_is_fake(box)) + reg1->alloc = 1; + return NULL; + } + + return &uncore_constraint_empty; +} + +void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + /* + * Only put constraint if extra reg was actually allocated. Also + * takes care of event which do not use an extra shared reg. + * + * Also, if this is a fake box we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent box + * state either since it will be thrown out. + */ + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + er = &box->shared_regs[reg1->idx]; + atomic_dec(&er->ref); + reg1->alloc = 0; +} + +u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + er = &box->shared_regs[idx]; + + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + + return config; +} + +static void uncore_assign_hw_event(struct intel_uncore_box *box, + struct perf_event *event, int idx) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = idx; + hwc->last_tag = ++box->tags[idx]; + + if (uncore_pmc_fixed(hwc->idx)) { + hwc->event_base = uncore_fixed_ctr(box); + hwc->config_base = uncore_fixed_ctl(box); + return; + } + + hwc->config_base = uncore_event_ctl(box, hwc->idx); + hwc->event_base = uncore_perf_ctr(box, hwc->idx); +} + +void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) +{ + u64 prev_count, new_count, delta; + int shift; + + if (uncore_pmc_freerunning(event->hw.idx)) + shift = 64 - uncore_freerunning_bits(box, event); + else if (uncore_pmc_fixed(event->hw.idx)) + shift = 64 - uncore_fixed_ctr_bits(box); + else + shift = 64 - uncore_perf_ctr_bits(box); + + /* the hrtimer might modify the previous event value */ +again: + prev_count = local64_read(&event->hw.prev_count); + new_count = uncore_read_counter(box, event); + if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) + goto again; + + delta = (new_count << shift) - (prev_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); +} + +/* + * The overflow interrupt is unavailable for SandyBridge-EP, is broken + * for SandyBridge. So we use hrtimer to periodically poll the counter + * to avoid overflow. + */ +static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) +{ + struct intel_uncore_box *box; + struct perf_event *event; + unsigned long flags; + int bit; + + box = container_of(hrtimer, struct intel_uncore_box, hrtimer); + if (!box->n_active || box->cpu != smp_processor_id()) + return HRTIMER_NORESTART; + /* + * disable local interrupt to prevent uncore_pmu_event_start/stop + * to interrupt the update process + */ + local_irq_save(flags); + + /* + * handle boxes with an active event list as opposed to active + * counters + */ + list_for_each_entry(event, &box->active_list, active_entry) { + uncore_perf_event_update(box, event); + } + + for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) + uncore_perf_event_update(box, box->events[bit]); + + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); + return HRTIMER_RESTART; +} + +void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) +{ + hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), + HRTIMER_MODE_REL_PINNED); +} + +void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) +{ + hrtimer_cancel(&box->hrtimer); +} + +static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) +{ + hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + box->hrtimer.function = uncore_pmu_hrtimer; +} + +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, + int node) +{ + int i, size, numshared = type->num_shared_regs ; + struct intel_uncore_box *box; + + size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); + + box = kzalloc_node(size, GFP_KERNEL, node); + if (!box) + return NULL; + + for (i = 0; i < numshared; i++) + raw_spin_lock_init(&box->shared_regs[i].lock); + + uncore_pmu_init_hrtimer(box); + box->cpu = -1; + box->dieid = -1; + + /* set default hrtimer timeout */ + box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; + + INIT_LIST_HEAD(&box->active_list); + + return box; +} + +/* + * Using uncore_pmu_event_init pmu event_init callback + * as a detection point for uncore events. + */ +static int uncore_pmu_event_init(struct perf_event *event); + +static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) +{ + return &box->pmu->pmu == event->pmu; +} + +static int +uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, + bool dogrp) +{ + struct perf_event *event; + int n, max_count; + + max_count = box->pmu->type->num_counters; + if (box->pmu->type->fixed_ctl) + max_count++; + + if (box->n_events >= max_count) + return -EINVAL; + + n = box->n_events; + + if (is_box_event(box, leader)) { + box->event_list[n] = leader; + n++; + } + + if (!dogrp) + return n; + + for_each_sibling_event(event, leader) { + if (!is_box_event(box, event) || + event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -EINVAL; + + box->event_list[n] = event; + n++; + } + return n; +} + +static struct event_constraint * +uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_type *type = box->pmu->type; + struct event_constraint *c; + + if (type->ops->get_constraint) { + c = type->ops->get_constraint(box, event); + if (c) + return c; + } + + if (event->attr.config == UNCORE_FIXED_EVENT) + return &uncore_constraint_fixed; + + if (type->constraints) { + for_each_event_constraint(c, type->constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &type->unconstrainted; +} + +static void uncore_put_event_constraint(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (box->pmu->type->ops->put_constraint) + box->pmu->type->ops->put_constraint(box, event); +} + +static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) +{ + unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; + struct event_constraint *c; + int i, wmin, wmax, ret = 0; + struct hw_perf_event *hwc; + + bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); + + for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { + c = uncore_get_event_constraint(box, box->event_list[i]); + box->event_constraint[i] = c; + wmin = min(wmin, c->weight); + wmax = max(wmax, c->weight); + } + + /* fastpath, try to reuse previous register */ + for (i = 0; i < n; i++) { + hwc = &box->event_list[i]->hw; + c = box->event_constraint[i]; + + /* never assigned */ + if (hwc->idx == -1) + break; + + /* constraint still honored */ + if (!test_bit(hwc->idx, c->idxmsk)) + break; + + /* not already used */ + if (test_bit(hwc->idx, used_mask)) + break; + + __set_bit(hwc->idx, used_mask); + if (assign) + assign[i] = hwc->idx; + } + /* slow path */ + if (i != n) + ret = perf_assign_events(box->event_constraint, n, + wmin, wmax, n, assign); + + if (!assign || ret) { + for (i = 0; i < n; i++) + uncore_put_event_constraint(box, box->event_list[i]); + } + return ret ? -EINVAL : 0; +} + +void uncore_pmu_event_start(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int idx = event->hw.idx; + + if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) + return; + + /* + * Free running counter is read-only and always active. + * Use the current counter value as start point. + * There is no overflow interrupt for free running counter. + * Use hrtimer to periodically poll the counter to avoid overflow. + */ + if (uncore_pmc_freerunning(event->hw.idx)) { + list_add_tail(&event->active_entry, &box->active_list); + local64_set(&event->hw.prev_count, + uncore_read_counter(box, event)); + if (box->n_active++ == 0) + uncore_pmu_start_hrtimer(box); + return; + } + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + box->events[idx] = event; + box->n_active++; + __set_bit(idx, box->active_mask); + + local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); + uncore_enable_event(box, event); + + if (box->n_active == 1) + uncore_pmu_start_hrtimer(box); +} + +void uncore_pmu_event_stop(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + /* Cannot disable free running counter which is read-only */ + if (uncore_pmc_freerunning(hwc->idx)) { + list_del(&event->active_entry); + if (--box->n_active == 0) + uncore_pmu_cancel_hrtimer(box); + uncore_perf_event_update(box, event); + return; + } + + if (__test_and_clear_bit(hwc->idx, box->active_mask)) { + uncore_disable_event(box, event); + box->n_active--; + box->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (box->n_active == 0) + uncore_pmu_cancel_hrtimer(box); + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + uncore_perf_event_update(box, event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +int uncore_pmu_event_add(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + int assign[UNCORE_PMC_IDX_MAX]; + int i, n, ret; + + if (!box) + return -ENODEV; + + /* + * The free funning counter is assigned in event_init(). + * The free running counter event and free running counter + * are 1:1 mapped. It doesn't need to be tracked in event_list. + */ + if (uncore_pmc_freerunning(hwc->idx)) { + if (flags & PERF_EF_START) + uncore_pmu_event_start(event, 0); + return 0; + } + + ret = n = uncore_collect_events(box, event, false); + if (ret < 0) + return ret; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + ret = uncore_assign_events(box, assign, n); + if (ret) + return ret; + + /* save events moving to new counters */ + for (i = 0; i < box->n_events; i++) { + event = box->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == assign[i] && + hwc->last_tag == box->tags[assign[i]]) + continue; + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + uncore_pmu_event_stop(event, PERF_EF_UPDATE); + } + + /* reprogram moved events into new counters */ + for (i = 0; i < n; i++) { + event = box->event_list[i]; + hwc = &event->hw; + + if (hwc->idx != assign[i] || + hwc->last_tag != box->tags[assign[i]]) + uncore_assign_hw_event(box, event, assign[i]); + else if (i < box->n_events) + continue; + + if (hwc->state & PERF_HES_ARCH) + continue; + + uncore_pmu_event_start(event, 0); + } + box->n_events = n; + + return 0; +} + +void uncore_pmu_event_del(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int i; + + uncore_pmu_event_stop(event, PERF_EF_UPDATE); + + /* + * The event for free running counter is not tracked by event_list. + * It doesn't need to force event->hw.idx = -1 to reassign the counter. + * Because the event and the free running counter are 1:1 mapped. + */ + if (uncore_pmc_freerunning(event->hw.idx)) + return; + + for (i = 0; i < box->n_events; i++) { + if (event == box->event_list[i]) { + uncore_put_event_constraint(box, event); + + for (++i; i < box->n_events; i++) + box->event_list[i - 1] = box->event_list[i]; + + --box->n_events; + break; + } + } + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; +} + +void uncore_pmu_event_read(struct perf_event *event) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + uncore_perf_event_update(box, event); +} + +/* + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ +static int uncore_validate_group(struct intel_uncore_pmu *pmu, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct intel_uncore_box *fake_box; + int ret = -EINVAL, n; + + /* The free running counter is always active. */ + if (uncore_pmc_freerunning(event->hw.idx)) + return 0; + + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); + if (!fake_box) + return -ENOMEM; + + fake_box->pmu = pmu; + /* + * the event is not yet connected with its + * siblings therefore we must first collect + * existing siblings, then add the new event + * before we can simulate the scheduling + */ + n = uncore_collect_events(fake_box, leader, true); + if (n < 0) + goto out; + + fake_box->n_events = n; + n = uncore_collect_events(fake_box, event, false); + if (n < 0) + goto out; + + fake_box->n_events = n; + + ret = uncore_assign_events(fake_box, NULL, n); +out: + kfree(fake_box); + return ret; +} + +static int uncore_pmu_event_init(struct perf_event *event) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + struct hw_perf_event *hwc = &event->hw; + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + pmu = uncore_event_to_pmu(event); + /* no device found for this pmu */ + if (pmu->func_id < 0) + return -ENOENT; + + /* Sampling not supported yet */ + if (hwc->sample_period) + return -EINVAL; + + /* + * Place all uncore events for a particular physical package + * onto a single cpu + */ + if (event->cpu < 0) + return -EINVAL; + box = uncore_pmu_to_box(pmu, event->cpu); + if (!box || box->cpu < 0) + return -EINVAL; + event->cpu = box->cpu; + event->pmu_private = box; + + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + + if (event->attr.config == UNCORE_FIXED_EVENT) { + /* no fixed counter */ + if (!pmu->type->fixed_ctl) + return -EINVAL; + /* + * if there is only one fixed counter, only the first pmu + * can access the fixed counter + */ + if (pmu->type->single_fixed && pmu->pmu_idx > 0) + return -EINVAL; + + /* fixed counters have event field hardcoded to zero */ + hwc->config = 0ULL; + } else if (is_freerunning_event(event)) { + hwc->config = event->attr.config; + if (!check_valid_freerunning_event(box, event)) + return -EINVAL; + event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; + /* + * The free running counter event and free running counter + * are always 1:1 mapped. + * The free running counter is always active. + * Assign the free running counter here. + */ + event->hw.event_base = uncore_freerunning_counter(box, event); + } else { + hwc->config = event->attr.config & + (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); + if (pmu->type->ops->hw_config) { + ret = pmu->type->ops->hw_config(box, event); + if (ret) + return ret; + } + } + + if (event->group_leader != event) + ret = uncore_validate_group(pmu, event); + else + ret = 0; + + return ret; +} + +static void uncore_pmu_enable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->enable_box) + uncore_pmu->type->ops->enable_box(box); +} + +static void uncore_pmu_disable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->disable_box) + uncore_pmu->type->ops->disable_box(box); +} + +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + if (type->num_boxes == 1) + sprintf(pmu_name, "uncore_type_%u", type->type_id); + else { + sprintf(pmu_name, "uncore_type_%u_%d", + type->type_id, type->box_ids[pmu->pmu_idx]); + } +} + +static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + /* + * No uncore block name in discovery table. + * Use uncore_type_&typeid_&boxid as name. + */ + if (!type->name) { + uncore_get_alias_name(pmu->name, pmu); + return; + } + + if (type->num_boxes == 1) { + if (strlen(type->name) > 0) + sprintf(pmu->name, "uncore_%s", type->name); + else + sprintf(pmu->name, "uncore"); + } else { + /* + * Use the box ID from the discovery table if applicable. + */ + sprintf(pmu->name, "uncore_%s_%d", type->name, + type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx); + } +} + +static int uncore_pmu_register(struct intel_uncore_pmu *pmu) +{ + int ret; + + if (!pmu->type->pmu) { + pmu->pmu = (struct pmu) { + .attr_groups = pmu->type->attr_groups, + .task_ctx_nr = perf_invalid_context, + .pmu_enable = uncore_pmu_enable, + .pmu_disable = uncore_pmu_disable, + .event_init = uncore_pmu_event_init, + .add = uncore_pmu_event_add, + .del = uncore_pmu_event_del, + .start = uncore_pmu_event_start, + .stop = uncore_pmu_event_stop, + .read = uncore_pmu_event_read, + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .attr_update = pmu->type->attr_update, + }; + } else { + pmu->pmu = *pmu->type->pmu; + pmu->pmu.attr_groups = pmu->type->attr_groups; + pmu->pmu.attr_update = pmu->type->attr_update; + } + + uncore_get_pmu_name(pmu); + + ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); + if (!ret) + pmu->registered = true; + return ret; +} + +static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) +{ + if (!pmu->registered) + return; + perf_pmu_unregister(&pmu->pmu); + pmu->registered = false; +} + +static void uncore_free_boxes(struct intel_uncore_pmu *pmu) +{ + int die; + + for (die = 0; die < uncore_max_dies(); die++) + kfree(pmu->boxes[die]); + kfree(pmu->boxes); +} + +static void uncore_type_exit(struct intel_uncore_type *type) +{ + struct intel_uncore_pmu *pmu = type->pmus; + int i; + + if (type->cleanup_mapping) + type->cleanup_mapping(type); + + if (pmu) { + for (i = 0; i < type->num_boxes; i++, pmu++) { + uncore_pmu_unregister(pmu); + uncore_free_boxes(pmu); + } + kfree(type->pmus); + type->pmus = NULL; + } + if (type->box_ids) { + kfree(type->box_ids); + type->box_ids = NULL; + } + kfree(type->events_group); + type->events_group = NULL; +} + +static void uncore_types_exit(struct intel_uncore_type **types) +{ + for (; *types; types++) + uncore_type_exit(*types); +} + +static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) +{ + struct intel_uncore_pmu *pmus; + size_t size; + int i, j; + + pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); + if (!pmus) + return -ENOMEM; + + size = uncore_max_dies() * sizeof(struct intel_uncore_box *); + + for (i = 0; i < type->num_boxes; i++) { + pmus[i].func_id = setid ? i : -1; + pmus[i].pmu_idx = i; + pmus[i].type = type; + pmus[i].boxes = kzalloc(size, GFP_KERNEL); + if (!pmus[i].boxes) + goto err; + } + + type->pmus = pmus; + type->unconstrainted = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, + 0, type->num_counters, 0, 0); + + if (type->event_descs) { + struct { + struct attribute_group group; + struct attribute *attrs[]; + } *attr_group; + for (i = 0; type->event_descs[i].attr.attr.name; i++); + + attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), + GFP_KERNEL); + if (!attr_group) + goto err; + + attr_group->group.name = "events"; + attr_group->group.attrs = attr_group->attrs; + + for (j = 0; j < i; j++) + attr_group->attrs[j] = &type->event_descs[j].attr.attr; + + type->events_group = &attr_group->group; + } + + type->pmu_group = &uncore_pmu_attr_group; + + if (type->set_mapping) + type->set_mapping(type); + + return 0; + +err: + for (i = 0; i < type->num_boxes; i++) + kfree(pmus[i].boxes); + kfree(pmus); + + return -ENOMEM; +} + +static int __init +uncore_types_init(struct intel_uncore_type **types, bool setid) +{ + int ret; + + for (; *types; types++) { + ret = uncore_type_init(*types, setid); + if (ret) + return ret; + } + return 0; +} + +/* + * Get the die information of a PCI device. + * @pdev: The PCI device. + * @die: The die id which the device maps to. + */ +static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) +{ + *die = uncore_pcibus_to_dieid(pdev->bus); + if (*die < 0) + return -EINVAL; + + return 0; +} + +static struct intel_uncore_pmu * +uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) +{ + struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_type *type; + u64 box_ctl; + int i, die; + + for (; *types; types++) { + type = *types; + for (die = 0; die < __uncore_max_dies; die++) { + for (i = 0; i < type->num_boxes; i++) { + if (!type->box_ctls[die]) + continue; + box_ctl = type->box_ctls[die] + type->pci_offsets[i]; + if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && + pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && + pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl)) + return &type->pmus[i]; + } + } + } + + return NULL; +} + +/* + * Find the PMU of a PCI device. + * @pdev: The PCI device. + * @ids: The ID table of the available PCI devices with a PMU. + * If NULL, search the whole uncore_pci_uncores. + */ +static struct intel_uncore_pmu * +uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) +{ + struct intel_uncore_pmu *pmu = NULL; + struct intel_uncore_type *type; + kernel_ulong_t data; + unsigned int devfn; + + if (!ids) + return uncore_pci_find_dev_pmu_from_types(pdev); + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + data = ids->driver_data; + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), + UNCORE_PCI_DEV_FUNC(data)); + if (devfn == pdev->devfn) { + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; + break; + } + } + ids++; + } + return pmu; +} + +/* + * Register the PMU for a PCI device + * @pdev: The PCI device. + * @type: The corresponding PMU type of the device. + * @pmu: The corresponding PMU of the device. + * @die: The die id which the device maps to. + */ +static int uncore_pci_pmu_register(struct pci_dev *pdev, + struct intel_uncore_type *type, + struct intel_uncore_pmu *pmu, + int die) +{ + struct intel_uncore_box *box; + int ret; + + if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) + return -EINVAL; + + box = uncore_alloc_box(type, NUMA_NO_NODE); + if (!box) + return -ENOMEM; + + if (pmu->func_id < 0) + pmu->func_id = pdev->devfn; + else + WARN_ON_ONCE(pmu->func_id != pdev->devfn); + + atomic_inc(&box->refcnt); + box->dieid = die; + box->pci_dev = pdev; + box->pmu = pmu; + uncore_box_init(box); + + pmu->boxes[die] = box; + if (atomic_inc_return(&pmu->activeboxes) > 1) + return 0; + + /* First active box registers the pmu */ + ret = uncore_pmu_register(pmu); + if (ret) { + pmu->boxes[die] = NULL; + uncore_box_exit(box); + kfree(box); + } + return ret; +} + +/* + * add a pci uncore device + */ +static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu = NULL; + int die, ret; + + ret = uncore_pci_get_dev_die_info(pdev, &die); + if (ret) + return ret; + + if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { + int idx = UNCORE_PCI_DEV_IDX(id->driver_data); + + uncore_extra_pci_dev[die].dev[idx] = pdev; + pci_set_drvdata(pdev, NULL); + return 0; + } + + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + + /* + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. + */ + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); + + pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; + } + + ret = uncore_pci_pmu_register(pdev, type, pmu, die); + + pci_set_drvdata(pdev, pmu->boxes[die]); + + return ret; +} + +/* + * Unregister the PMU of a PCI device + * @pmu: The corresponding PMU is unregistered. + * @die: The die id which the device maps to. + */ +static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) +{ + struct intel_uncore_box *box = pmu->boxes[die]; + + pmu->boxes[die] = NULL; + if (atomic_dec_return(&pmu->activeboxes) == 0) + uncore_pmu_unregister(pmu); + uncore_box_exit(box); + kfree(box); +} + +static void uncore_pci_remove(struct pci_dev *pdev) +{ + struct intel_uncore_box *box; + struct intel_uncore_pmu *pmu; + int i, die; + + if (uncore_pci_get_dev_die_info(pdev, &die)) + return; + + box = pci_get_drvdata(pdev); + if (!box) { + for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { + if (uncore_extra_pci_dev[die].dev[i] == pdev) { + uncore_extra_pci_dev[die].dev[i] = NULL; + break; + } + } + WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); + return; + } + + pmu = box->pmu; + + pci_set_drvdata(pdev, NULL); + + uncore_pci_pmu_unregister(pmu, die); +} + +static int uncore_bus_notify(struct notifier_block *nb, + unsigned long action, void *data, + const struct pci_device_id *ids) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct intel_uncore_pmu *pmu; + int die; + + /* Unregister the PMU when the device is going to be deleted. */ + if (action != BUS_NOTIFY_DEL_DEVICE) + return NOTIFY_DONE; + + pmu = uncore_pci_find_dev_pmu(pdev, ids); + if (!pmu) + return NOTIFY_DONE; + + if (uncore_pci_get_dev_die_info(pdev, &die)) + return NOTIFY_DONE; + + uncore_pci_pmu_unregister(pmu, die); + + return NOTIFY_OK; +} + +static int uncore_pci_sub_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + return uncore_bus_notify(nb, action, data, + uncore_pci_sub_driver->id_table); +} + +static struct notifier_block uncore_pci_sub_notifier = { + .notifier_call = uncore_pci_sub_bus_notify, +}; + +static void uncore_pci_sub_driver_init(void) +{ + const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct pci_dev *pci_sub_dev; + bool notify = false; + unsigned int devfn; + int die; + + while (ids && ids->vendor) { + pci_sub_dev = NULL; + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; + /* + * Search the available device, and register the + * corresponding PMU. + */ + while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, + ids->device, pci_sub_dev))) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn != pci_sub_dev->devfn) + continue; + + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + if (!pmu) + continue; + + if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) + continue; + + if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, + die)) + notify = true; + } + ids++; + } + + if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) + notify = false; + + if (!notify) + uncore_pci_sub_driver = NULL; +} + +static int uncore_pci_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + return uncore_bus_notify(nb, action, data, NULL); +} + +static struct notifier_block uncore_pci_notifier = { + .notifier_call = uncore_pci_bus_notify, +}; + + +static void uncore_pci_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct pci_dev *pdev; + u64 box_ctl; + int i, die; + + for (; *types; types++) { + type = *types; + for (die = 0; die < __uncore_max_dies; die++) { + for (i = 0; i < type->num_boxes; i++) { + if (!type->box_ctls[die]) + continue; + box_ctl = type->box_ctls[die] + type->pci_offsets[i]; + pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), + UNCORE_DISCOVERY_PCI_BUS(box_ctl), + UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); + if (!pdev) + continue; + pmu = &type->pmus[i]; + + uncore_pci_pmu_register(pdev, type, pmu, die); + } + } + } + + bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); +} + +static int __init uncore_pci_init(void) +{ + size_t size; + int ret; + + size = uncore_max_dies() * sizeof(struct pci_extra_dev); + uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); + if (!uncore_extra_pci_dev) { + ret = -ENOMEM; + goto err; + } + + ret = uncore_types_init(uncore_pci_uncores, false); + if (ret) + goto errtype; + + if (uncore_pci_driver) { + uncore_pci_driver->probe = uncore_pci_probe; + uncore_pci_driver->remove = uncore_pci_remove; + + ret = pci_register_driver(uncore_pci_driver); + if (ret) + goto errtype; + } else + uncore_pci_pmus_register(); + + if (uncore_pci_sub_driver) + uncore_pci_sub_driver_init(); + + pcidrv_registered = true; + return 0; + +errtype: + uncore_types_exit(uncore_pci_uncores); + kfree(uncore_extra_pci_dev); + uncore_extra_pci_dev = NULL; + uncore_free_pcibus_map(); +err: + uncore_pci_uncores = empty_uncore; + return ret; +} + +static void uncore_pci_exit(void) +{ + if (pcidrv_registered) { + pcidrv_registered = false; + if (uncore_pci_sub_driver) + bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); + if (uncore_pci_driver) + pci_unregister_driver(uncore_pci_driver); + else + bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); + uncore_types_exit(uncore_pci_uncores); + kfree(uncore_extra_pci_dev); + uncore_free_pcibus_map(); + } +} + +static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, + int new_cpu) +{ + struct intel_uncore_pmu *pmu = type->pmus; + struct intel_uncore_box *box; + int i, die; + + die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[die]; + if (!box) + continue; + + if (old_cpu < 0) { + WARN_ON_ONCE(box->cpu != -1); + box->cpu = new_cpu; + continue; + } + + WARN_ON_ONCE(box->cpu != old_cpu); + box->cpu = -1; + if (new_cpu < 0) + continue; + + uncore_pmu_cancel_hrtimer(box); + perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); + box->cpu = new_cpu; + } +} + +static void uncore_change_context(struct intel_uncore_type **uncores, + int old_cpu, int new_cpu) +{ + for (; *uncores; uncores++) + uncore_change_type_ctx(*uncores, old_cpu, new_cpu); +} + +static void uncore_box_unref(struct intel_uncore_type **types, int id) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[id]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } +} + +static int uncore_event_cpu_offline(unsigned int cpu) +{ + int die, target; + + /* Check if exiting cpu is used for collecting uncore events */ + if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) + goto unref; + /* Find a new cpu to collect uncore events */ + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); + + /* Migrate uncore events to the new target */ + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &uncore_cpu_mask); + else + target = -1; + + uncore_change_context(uncore_msr_uncores, cpu, target); + uncore_change_context(uncore_mmio_uncores, cpu, target); + uncore_change_context(uncore_pci_uncores, cpu, target); + +unref: + /* Clear the references */ + die = topology_logical_die_id(cpu); + uncore_box_unref(uncore_msr_uncores, die); + uncore_box_unref(uncore_mmio_uncores, die); + return 0; +} + +static int allocate_boxes(struct intel_uncore_type **types, + unsigned int die, unsigned int cpu) +{ + struct intel_uncore_box *box, *tmp; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + LIST_HEAD(allocated); + int i; + + /* Try to allocate all required boxes */ + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[die]) + continue; + box = uncore_alloc_box(type, cpu_to_node(cpu)); + if (!box) + goto cleanup; + box->pmu = pmu; + box->dieid = die; + list_add(&box->active_list, &allocated); + } + } + /* Install them in the pmus */ + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + box->pmu->boxes[die] = box; + } + return 0; + +cleanup: + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + kfree(box); + } + return -ENOMEM; +} + +static int uncore_box_ref(struct intel_uncore_type **types, + int id, unsigned int cpu) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, ret; + + ret = allocate_boxes(types, id, cpu); + if (ret) + return ret; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[id]; + if (box && atomic_inc_return(&box->refcnt) == 1) + uncore_box_init(box); + } + } + return 0; +} + +static int uncore_event_cpu_online(unsigned int cpu) +{ + int die, target, msr_ret, mmio_ret; + + die = topology_logical_die_id(cpu); + msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); + mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); + if (msr_ret && mmio_ret) + return -ENOMEM; + + /* + * Check if there is an online cpu in the package + * which collects uncore events already. + */ + target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); + if (target < nr_cpu_ids) + return 0; + + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + if (!msr_ret) + uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!mmio_ret) + uncore_change_context(uncore_mmio_uncores, -1, cpu); + uncore_change_context(uncore_pci_uncores, -1, cpu); + return 0; +} + +static int __init type_pmu_register(struct intel_uncore_type *type) +{ + int i, ret; + + for (i = 0; i < type->num_boxes; i++) { + ret = uncore_pmu_register(&type->pmus[i]); + if (ret) + return ret; + } + return 0; +} + +static int __init uncore_msr_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_msr_uncores; + int ret; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + return ret; + } + return 0; +} + +static int __init uncore_cpu_init(void) +{ + int ret; + + ret = uncore_types_init(uncore_msr_uncores, true); + if (ret) + goto err; + + ret = uncore_msr_pmus_register(); + if (ret) + goto err; + return 0; +err: + uncore_types_exit(uncore_msr_uncores); + uncore_msr_uncores = empty_uncore; + return ret; +} + +static int __init uncore_mmio_init(void) +{ + struct intel_uncore_type **types = uncore_mmio_uncores; + int ret; + + ret = uncore_types_init(types, true); + if (ret) + goto err; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + goto err; + } + return 0; +err: + uncore_types_exit(uncore_mmio_uncores); + uncore_mmio_uncores = empty_uncore; + return ret; +} + +struct intel_uncore_init_fun { + void (*cpu_init)(void); + int (*pci_init)(void); + void (*mmio_init)(void); + bool use_discovery; +}; + +static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { + .cpu_init = nhm_uncore_cpu_init, +}; + +static const struct intel_uncore_init_fun snb_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = snb_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = ivb_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = hsw_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = bdw_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { + .cpu_init = snbep_uncore_cpu_init, + .pci_init = snbep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { + .cpu_init = nhmex_uncore_cpu_init, +}; + +static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { + .cpu_init = ivbep_uncore_cpu_init, + .pci_init = ivbep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { + .cpu_init = hswep_uncore_cpu_init, + .pci_init = hswep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { + .cpu_init = bdx_uncore_cpu_init, + .pci_init = bdx_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun knl_uncore_init __initconst = { + .cpu_init = knl_uncore_cpu_init, + .pci_init = knl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .cpu_init = skl_uncore_cpu_init, + .pci_init = skl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun skx_uncore_init __initconst = { + .cpu_init = skx_uncore_cpu_init, + .pci_init = skx_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun icl_uncore_init __initconst = { + .cpu_init = icl_uncore_cpu_init, + .pci_init = skl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { + .cpu_init = tgl_uncore_cpu_init, + .mmio_init = tgl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { + .cpu_init = tgl_uncore_cpu_init, + .mmio_init = tgl_l_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { + .cpu_init = tgl_uncore_cpu_init, + .pci_init = skl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun adl_uncore_init __initconst = { + .cpu_init = adl_uncore_cpu_init, + .mmio_init = adl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { + .cpu_init = mtl_uncore_cpu_init, + .mmio_init = adl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun icx_uncore_init __initconst = { + .cpu_init = icx_uncore_cpu_init, + .pci_init = icx_uncore_pci_init, + .mmio_init = icx_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun snr_uncore_init __initconst = { + .cpu_init = snr_uncore_cpu_init, + .pci_init = snr_uncore_pci_init, + .mmio_init = snr_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun spr_uncore_init __initconst = { + .cpu_init = spr_uncore_cpu_init, + .pci_init = spr_uncore_pci_init, + .mmio_init = spr_uncore_mmio_init, + .use_discovery = true, +}; + +static const struct intel_uncore_init_fun generic_uncore_init __initconst = { + .cpu_init = intel_uncore_generic_uncore_cpu_init, + .pci_init = intel_uncore_generic_uncore_pci_init, + .mmio_init = intel_uncore_generic_uncore_mmio_init, +}; + +static const struct x86_cpu_id intel_uncore_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &mtl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), + {}, +}; +MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); + +static int __init intel_uncore_init(void) +{ + const struct x86_cpu_id *id; + struct intel_uncore_init_fun *uncore_init; + int pret = 0, cret = 0, mret = 0, ret; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + __uncore_max_dies = + topology_max_packages() * topology_max_die_per_package(); + + id = x86_match_cpu(intel_uncore_match); + if (!id) { + if (!uncore_no_discover && intel_uncore_has_discovery_tables()) + uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; + else + return -ENODEV; + } else { + uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + if (uncore_no_discover && uncore_init->use_discovery) + return -ENODEV; + if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables()) + return -ENODEV; + } + + if (uncore_init->pci_init) { + pret = uncore_init->pci_init(); + if (!pret) + pret = uncore_pci_init(); + } + + if (uncore_init->cpu_init) { + uncore_init->cpu_init(); + cret = uncore_cpu_init(); + } + + if (uncore_init->mmio_init) { + uncore_init->mmio_init(); + mret = uncore_mmio_init(); + } + + if (cret && pret && mret) { + ret = -ENODEV; + goto free_discovery; + } + + /* Install hotplug callbacks to setup the targets for each package */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "perf/x86/intel/uncore:online", + uncore_event_cpu_online, + uncore_event_cpu_offline); + if (ret) + goto err; + return 0; + +err: + uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); + uncore_pci_exit(); +free_discovery: + intel_uncore_clear_discovery_tables(); + return ret; +} +module_init(intel_uncore_init); + +static void __exit intel_uncore_exit(void) +{ + cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); + uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); + uncore_pci_exit(); + intel_uncore_clear_discovery_tables(); +} +module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h new file mode 100644 index 000000000..b74e35291 --- /dev/null +++ b/arch/x86/events/intel/uncore.h @@ -0,0 +1,620 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include + +#include +#include "../perf_event.h" + +#define UNCORE_PMU_NAME_LEN 32 +#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) +#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC) + +#define UNCORE_FIXED_EVENT 0xff +#define UNCORE_PMC_IDX_MAX_GENERIC 8 +#define UNCORE_PMC_IDX_MAX_FIXED 1 +#define UNCORE_PMC_IDX_MAX_FREERUNNING 1 +#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC +#define UNCORE_PMC_IDX_FREERUNNING (UNCORE_PMC_IDX_FIXED + \ + UNCORE_PMC_IDX_MAX_FIXED) +#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FREERUNNING + \ + UNCORE_PMC_IDX_MAX_FREERUNNING) + +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) +#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) +#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) +#define UNCORE_PCI_DEV_IDX(data) (data & 0xff) +#define UNCORE_EXTRA_PCI_DEV 0xff +#define UNCORE_EXTRA_PCI_DEV_MAX 4 + +#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) + +struct pci_extra_dev { + struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX]; +}; + +struct intel_uncore_ops; +struct intel_uncore_pmu; +struct intel_uncore_box; +struct uncore_event_desc; +struct freerunning_counters; +struct intel_uncore_topology; + +struct intel_uncore_type { + const char *name; + int num_counters; + int num_boxes; + int perf_ctr_bits; + int fixed_ctr_bits; + int num_freerunning_types; + int type_id; + unsigned perf_ctr; + unsigned event_ctl; + unsigned event_mask; + unsigned event_mask_ext; + unsigned fixed_ctr; + unsigned fixed_ctl; + unsigned box_ctl; + u64 *box_ctls; /* Unit ctrl addr of the first box of each die */ + union { + unsigned msr_offset; + unsigned mmio_offset; + }; + unsigned mmio_map_size; + unsigned num_shared_regs:8; + unsigned single_fixed:1; + unsigned pair_ctr_ctl:1; + union { + unsigned *msr_offsets; + unsigned *pci_offsets; + unsigned *mmio_offsets; + }; + unsigned *box_ids; + struct event_constraint unconstrainted; + struct event_constraint *constraints; + struct intel_uncore_pmu *pmus; + struct intel_uncore_ops *ops; + struct uncore_event_desc *event_descs; + struct freerunning_counters *freerunning; + const struct attribute_group *attr_groups[4]; + const struct attribute_group **attr_update; + struct pmu *pmu; /* for custom pmu ops */ + /* + * Uncore PMU would store relevant platform topology configuration here + * to identify which platform component each PMON block of that type is + * supposed to monitor. + */ + struct intel_uncore_topology *topology; + /* + * Optional callbacks for managing mapping of Uncore units to PMONs + */ + int (*get_topology)(struct intel_uncore_type *type); + int (*set_mapping)(struct intel_uncore_type *type); + void (*cleanup_mapping)(struct intel_uncore_type *type); +}; + +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] + +struct intel_uncore_ops { + void (*init_box)(struct intel_uncore_box *); + void (*exit_box)(struct intel_uncore_box *); + void (*disable_box)(struct intel_uncore_box *); + void (*enable_box)(struct intel_uncore_box *); + void (*disable_event)(struct intel_uncore_box *, struct perf_event *); + void (*enable_event)(struct intel_uncore_box *, struct perf_event *); + u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); + int (*hw_config)(struct intel_uncore_box *, struct perf_event *); + struct event_constraint *(*get_constraint)(struct intel_uncore_box *, + struct perf_event *); + void (*put_constraint)(struct intel_uncore_box *, struct perf_event *); +}; + +struct intel_uncore_pmu { + struct pmu pmu; + char name[UNCORE_PMU_NAME_LEN]; + int pmu_idx; + int func_id; + bool registered; + atomic_t activeboxes; + struct intel_uncore_type *type; + struct intel_uncore_box **boxes; +}; + +struct intel_uncore_extra_reg { + raw_spinlock_t lock; + u64 config, config1, config2; + atomic_t ref; +}; + +struct intel_uncore_box { + int dieid; /* Logical die ID */ + int n_active; /* number of active events */ + int n_events; + int cpu; /* cpu to collect events */ + unsigned long flags; + atomic_t refcnt; + struct perf_event *events[UNCORE_PMC_IDX_MAX]; + struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; + struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX]; + unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; + u64 tags[UNCORE_PMC_IDX_MAX]; + struct pci_dev *pci_dev; + struct intel_uncore_pmu *pmu; + u64 hrtimer_duration; /* hrtimer timeout for this box */ + struct hrtimer hrtimer; + struct list_head list; + struct list_head active_list; + void __iomem *io_addr; + struct intel_uncore_extra_reg shared_regs[]; +}; + +/* CFL uncore 8th cbox MSRs */ +#define CFL_UNC_CBO_7_PERFEVTSEL0 0xf70 +#define CFL_UNC_CBO_7_PER_CTR0 0xf76 + +#define UNCORE_BOX_FLAG_INITIATED 0 +/* event config registers are 8-byte apart */ +#define UNCORE_BOX_FLAG_CTL_OFFS8 1 +/* CFL 8th CBOX has different MSR space */ +#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 + +struct uncore_event_desc { + struct device_attribute attr; + const char *config; +}; + +struct freerunning_counters { + unsigned int counter_base; + unsigned int counter_offset; + unsigned int box_offset; + unsigned int num_counters; + unsigned int bits; + unsigned *box_offsets; +}; + +struct intel_uncore_topology { + u64 configuration; + int segment; +}; + +struct pci2phy_map { + struct list_head list; + int segment; + int pbus_to_dieid[256]; +}; + +struct pci2phy_map *__find_pci2phy_map(int segment); +int uncore_pcibus_to_dieid(struct pci_bus *bus); +int uncore_die_to_segment(int die); + +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); + +static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev) +{ + return container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); +} + +#define to_device_attribute(n) container_of(n, struct device_attribute, attr) +#define to_dev_ext_attribute(n) container_of(n, struct dev_ext_attribute, attr) +#define attr_to_ext_attr(n) to_dev_ext_attribute(to_device_attribute(n)) + +extern int __uncore_max_dies; +#define uncore_max_dies() (__uncore_max_dies) + +#define INTEL_UNCORE_EVENT_DESC(_name, _config) \ +{ \ + .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \ + .config = _config, \ +} + +#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct device_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __uncore_##_var##_show, NULL) + +static inline bool uncore_pmc_fixed(int idx) +{ + return idx == UNCORE_PMC_IDX_FIXED; +} + +static inline bool uncore_pmc_freerunning(int idx) +{ + return idx == UNCORE_PMC_IDX_FREERUNNING; +} + +static inline bool uncore_mmio_is_valid_offset(struct intel_uncore_box *box, + unsigned long offset) +{ + if (offset < box->pmu->type->mmio_map_size) + return true; + + pr_warn_once("perf uncore: Invalid offset 0x%lx exceeds mapped area of %s.\n", + offset, box->pmu->type->name); + + return false; +} + +static inline +unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * box->pmu->pmu_idx; +} + +static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->box_ctl; +} + +static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctl; +} + +static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctr; +} + +static inline +unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) +{ + if (test_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags)) + return idx * 8 + box->pmu->type->event_ctl; + + return idx * 4 + box->pmu->type->event_ctl; +} + +static inline +unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) +{ + return idx * 8 + box->pmu->type->perf_ctr; +} + +static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box) +{ + struct intel_uncore_pmu *pmu = box->pmu; + return pmu->type->msr_offsets ? + pmu->type->msr_offsets[pmu->pmu_idx] : + pmu->type->msr_offset * pmu->pmu_idx; +} + +static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) +{ + if (!box->pmu->type->box_ctl) + return 0; + return box->pmu->type->box_ctl + uncore_msr_box_offset(box); +} + +static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) +{ + if (!box->pmu->type->fixed_ctl) + return 0; + return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box); +} + +static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box); +} + + +/* + * In the uncore document, there is no event-code assigned to free running + * counters. Some events need to be defined to indicate the free running + * counters. The events are encoded as event-code + umask-code. + * + * The event-code for all free running counters is 0xff, which is the same as + * the fixed counters. + * + * The umask-code is used to distinguish a fixed counter and a free running + * counter, and different types of free running counters. + * - For fixed counters, the umask-code is 0x0X. + * X indicates the index of the fixed counter, which starts from 0. + * - For free running counters, the umask-code uses the rest of the space. + * It would bare the format of 0xXY. + * X stands for the type of free running counters, which starts from 1. + * Y stands for the index of free running counters of same type, which + * starts from 0. + * + * For example, there are three types of IIO free running counters on Skylake + * server, IO CLOCKS counters, BANDWIDTH counters and UTILIZATION counters. + * The event-code for all the free running counters is 0xff. + * 'ioclk' is the first counter of IO CLOCKS. IO CLOCKS is the first type, + * which umask-code starts from 0x10. + * So 'ioclk' is encoded as event=0xff,umask=0x10 + * 'bw_in_port2' is the third counter of BANDWIDTH counters. BANDWIDTH is + * the second type, which umask-code starts from 0x20. + * So 'bw_in_port2' is encoded as event=0xff,umask=0x22 + */ +static inline unsigned int uncore_freerunning_idx(u64 config) +{ + return ((config >> 8) & 0xf); +} + +#define UNCORE_FREERUNNING_UMASK_START 0x10 + +static inline unsigned int uncore_freerunning_type(u64 config) +{ + return ((((config >> 8) - UNCORE_FREERUNNING_UMASK_START) >> 4) & 0xf); +} + +static inline +unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->hw.config); + unsigned int idx = uncore_freerunning_idx(event->hw.config); + struct intel_uncore_pmu *pmu = box->pmu; + + return pmu->type->freerunning[type].counter_base + + pmu->type->freerunning[type].counter_offset * idx + + (pmu->type->freerunning[type].box_offsets ? + pmu->type->freerunning[type].box_offsets[pmu->pmu_idx] : + pmu->type->freerunning[type].box_offset * pmu->pmu_idx); +} + +static inline +unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) +{ + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PERFEVTSEL0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } +} + +static inline +unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) +{ + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PER_CTR0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } +} + +static inline +unsigned uncore_fixed_ctl(struct intel_uncore_box *box) +{ + if (box->pci_dev || box->io_addr) + return uncore_pci_fixed_ctl(box); + else + return uncore_msr_fixed_ctl(box); +} + +static inline +unsigned uncore_fixed_ctr(struct intel_uncore_box *box) +{ + if (box->pci_dev || box->io_addr) + return uncore_pci_fixed_ctr(box); + else + return uncore_msr_fixed_ctr(box); +} + +static inline +unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) +{ + if (box->pci_dev || box->io_addr) + return uncore_pci_event_ctl(box, idx); + else + return uncore_msr_event_ctl(box, idx); +} + +static inline +unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) +{ + if (box->pci_dev || box->io_addr) + return uncore_pci_perf_ctr(box, idx); + else + return uncore_msr_perf_ctr(box, idx); +} + +static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) +{ + return box->pmu->type->perf_ctr_bits; +} + +static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctr_bits; +} + +static inline +unsigned int uncore_freerunning_bits(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->hw.config); + + return box->pmu->type->freerunning[type].bits; +} + +static inline int uncore_num_freerunning(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->hw.config); + + return box->pmu->type->freerunning[type].num_counters; +} + +static inline int uncore_num_freerunning_types(struct intel_uncore_box *box, + struct perf_event *event) +{ + return box->pmu->type->num_freerunning_types; +} + +static inline bool check_valid_freerunning_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->hw.config); + unsigned int idx = uncore_freerunning_idx(event->hw.config); + + return (type < uncore_num_freerunning_types(box, event)) && + (idx < uncore_num_freerunning(box, event)); +} + +static inline int uncore_num_counters(struct intel_uncore_box *box) +{ + return box->pmu->type->num_counters; +} + +static inline bool is_freerunning_event(struct perf_event *event) +{ + u64 cfg = event->attr.config; + + return ((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) && + (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START); +} + +/* Check and reject invalid config */ +static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (is_freerunning_event(event)) + return 0; + + return -EINVAL; +} + +static inline void uncore_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + box->pmu->type->ops->disable_event(box, event); +} + +static inline void uncore_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + box->pmu->type->ops->enable_event(box, event); +} + +static inline u64 uncore_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + return box->pmu->type->ops->read_counter(box, event); +} + +static inline void uncore_box_init(struct intel_uncore_box *box) +{ + if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->init_box) + box->pmu->type->ops->init_box(box); + } +} + +static inline void uncore_box_exit(struct intel_uncore_box *box) +{ + if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->exit_box) + box->pmu->type->ops->exit_box(box); + } +} + +static inline bool uncore_box_is_fake(struct intel_uncore_box *box) +{ + return (box->dieid < 0); +} + +static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ + return event->pmu_private; +} + +struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); +u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); +void uncore_mmio_exit_box(struct intel_uncore_box *box); +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event); +void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_event_start(struct perf_event *event, int flags); +void uncore_pmu_event_stop(struct perf_event *event, int flags); +int uncore_pmu_event_add(struct perf_event *event, int flags); +void uncore_pmu_event_del(struct perf_event *event, int flags); +void uncore_pmu_event_read(struct perf_event *event); +void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); +struct event_constraint * +uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); +void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); +u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu); + +extern struct intel_uncore_type *empty_uncore[]; +extern struct intel_uncore_type **uncore_msr_uncores; +extern struct intel_uncore_type **uncore_pci_uncores; +extern struct intel_uncore_type **uncore_mmio_uncores; +extern struct pci_driver *uncore_pci_driver; +extern struct pci_driver *uncore_pci_sub_driver; +extern raw_spinlock_t pci2phy_map_lock; +extern struct list_head pci2phy_map_head; +extern struct pci_extra_dev *uncore_extra_pci_dev; +extern struct event_constraint uncore_constraint_empty; + +/* uncore_snb.c */ +int snb_uncore_pci_init(void); +int ivb_uncore_pci_init(void); +int hsw_uncore_pci_init(void); +int bdw_uncore_pci_init(void); +int skl_uncore_pci_init(void); +void snb_uncore_cpu_init(void); +void nhm_uncore_cpu_init(void); +void skl_uncore_cpu_init(void); +void icl_uncore_cpu_init(void); +void tgl_uncore_cpu_init(void); +void adl_uncore_cpu_init(void); +void mtl_uncore_cpu_init(void); +void tgl_uncore_mmio_init(void); +void tgl_l_uncore_mmio_init(void); +void adl_uncore_mmio_init(void); +int snb_pci2phy_map_init(int devid); + +/* uncore_snbep.c */ +int snbep_uncore_pci_init(void); +void snbep_uncore_cpu_init(void); +int ivbep_uncore_pci_init(void); +void ivbep_uncore_cpu_init(void); +int hswep_uncore_pci_init(void); +void hswep_uncore_cpu_init(void); +int bdx_uncore_pci_init(void); +void bdx_uncore_cpu_init(void); +int knl_uncore_pci_init(void); +void knl_uncore_cpu_init(void); +int skx_uncore_pci_init(void); +void skx_uncore_cpu_init(void); +int snr_uncore_pci_init(void); +void snr_uncore_cpu_init(void); +void snr_uncore_mmio_init(void); +int icx_uncore_pci_init(void); +void icx_uncore_cpu_init(void); +void icx_uncore_mmio_init(void); +int spr_uncore_pci_init(void); +void spr_uncore_cpu_init(void); +void spr_uncore_mmio_init(void); + +/* uncore_nhmex.c */ +void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c new file mode 100644 index 000000000..7d4541414 --- /dev/null +++ b/arch/x86/events/intel/uncore_discovery.c @@ -0,0 +1,642 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Support Intel uncore PerfMon discovery mechanism. + * Copyright(c) 2021 Intel Corporation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "uncore.h" +#include "uncore_discovery.h" + +static struct rb_root discovery_tables = RB_ROOT; +static int num_discovered_types[UNCORE_ACCESS_MAX]; + +static bool has_generic_discovery_table(void) +{ + struct pci_dev *dev; + int dvsec; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, UNCORE_DISCOVERY_TABLE_DEVICE, NULL); + if (!dev) + return false; + + /* A discovery table device has the unique capability ID. */ + dvsec = pci_find_next_ext_capability(dev, 0, UNCORE_EXT_CAP_ID_DISCOVERY); + pci_dev_put(dev); + if (dvsec) + return true; + + return false; +} + +static int logical_die_id; + +static int get_device_die_id(struct pci_dev *dev) +{ + int cpu, node = pcibus_to_node(dev->bus); + + /* + * If the NUMA info is not available, assume that the logical die id is + * continuous in the order in which the discovery table devices are + * detected. + */ + if (node < 0) + return logical_die_id++; + + for_each_cpu(cpu, cpumask_of_node(node)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) + return c->logical_die_id; + } + + /* + * All CPUs of a node may be offlined. For this case, + * the PCI and MMIO type of uncore blocks which are + * enumerated by the device will be unavailable. + */ + return -1; +} + +#define __node_2_type(cur) \ + rb_entry((cur), struct intel_uncore_discovery_type, node) + +static inline int __type_cmp(const void *key, const struct rb_node *b) +{ + struct intel_uncore_discovery_type *type_b = __node_2_type(b); + const u16 *type_id = key; + + if (type_b->type > *type_id) + return -1; + else if (type_b->type < *type_id) + return 1; + + return 0; +} + +static inline struct intel_uncore_discovery_type * +search_uncore_discovery_type(u16 type_id) +{ + struct rb_node *node = rb_find(&type_id, &discovery_tables, __type_cmp); + + return (node) ? __node_2_type(node) : NULL; +} + +static inline bool __type_less(struct rb_node *a, const struct rb_node *b) +{ + return (__node_2_type(a)->type < __node_2_type(b)->type); +} + +static struct intel_uncore_discovery_type * +add_uncore_discovery_type(struct uncore_unit_discovery *unit) +{ + struct intel_uncore_discovery_type *type; + + if (unit->access_type >= UNCORE_ACCESS_MAX) { + pr_warn("Unsupported access type %d\n", unit->access_type); + return NULL; + } + + type = kzalloc(sizeof(struct intel_uncore_discovery_type), GFP_KERNEL); + if (!type) + return NULL; + + type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); + if (!type->box_ctrl_die) + goto free_type; + + type->access_type = unit->access_type; + num_discovered_types[type->access_type]++; + type->type = unit->box_type; + + rb_add(&type->node, &discovery_tables, __type_less); + + return type; + +free_type: + kfree(type); + + return NULL; + +} + +static struct intel_uncore_discovery_type * +get_uncore_discovery_type(struct uncore_unit_discovery *unit) +{ + struct intel_uncore_discovery_type *type; + + type = search_uncore_discovery_type(unit->box_type); + if (type) + return type; + + return add_uncore_discovery_type(unit); +} + +static void +uncore_insert_box_info(struct uncore_unit_discovery *unit, + int die, bool parsed) +{ + struct intel_uncore_discovery_type *type; + unsigned int *box_offset, *ids; + int i; + + if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) { + pr_info("Invalid address is detected for uncore type %d box %d, " + "Disable the uncore unit.\n", + unit->box_type, unit->box_id); + return; + } + + if (parsed) { + type = search_uncore_discovery_type(unit->box_type); + if (!type) { + pr_info("A spurious uncore type %d is detected, " + "Disable the uncore type.\n", + unit->box_type); + return; + } + /* Store the first box of each die */ + if (!type->box_ctrl_die[die]) + type->box_ctrl_die[die] = unit->ctl; + return; + } + + type = get_uncore_discovery_type(unit); + if (!type) + return; + + box_offset = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + if (!box_offset) + return; + + ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + if (!ids) + goto free_box_offset; + + /* Store generic information for the first box */ + if (!type->num_boxes) { + type->box_ctrl = unit->ctl; + type->box_ctrl_die[die] = unit->ctl; + type->num_counters = unit->num_regs; + type->counter_width = unit->bit_width; + type->ctl_offset = unit->ctl_offset; + type->ctr_offset = unit->ctr_offset; + *ids = unit->box_id; + goto end; + } + + for (i = 0; i < type->num_boxes; i++) { + ids[i] = type->ids[i]; + box_offset[i] = type->box_offset[i]; + + if (unit->box_id == ids[i]) { + pr_info("Duplicate uncore type %d box ID %d is detected, " + "Drop the duplicate uncore unit.\n", + unit->box_type, unit->box_id); + goto free_ids; + } + } + ids[i] = unit->box_id; + box_offset[i] = unit->ctl - type->box_ctrl; + kfree(type->ids); + kfree(type->box_offset); +end: + type->ids = ids; + type->box_offset = box_offset; + type->num_boxes++; + return; + +free_ids: + kfree(ids); + +free_box_offset: + kfree(box_offset); + +} + +static int parse_discovery_table(struct pci_dev *dev, int die, + u32 bar_offset, bool *parsed) +{ + struct uncore_global_discovery global; + struct uncore_unit_discovery unit; + void __iomem *io_addr; + resource_size_t addr; + unsigned long size; + u32 val; + int i; + + pci_read_config_dword(dev, bar_offset, &val); + + if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64) + return -EINVAL; + + addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + u32 val2; + + pci_read_config_dword(dev, bar_offset + 4, &val2); + addr |= ((resource_size_t)val2) << 32; + } +#endif + size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; + io_addr = ioremap(addr, size); + if (!io_addr) + return -ENOMEM; + + /* Read Global Discovery State */ + memcpy_fromio(&global, io_addr, sizeof(struct uncore_global_discovery)); + if (uncore_discovery_invalid_unit(global)) { + pr_info("Invalid Global Discovery State: 0x%llx 0x%llx 0x%llx\n", + global.table1, global.ctl, global.table3); + iounmap(io_addr); + return -EINVAL; + } + iounmap(io_addr); + + size = (1 + global.max_units) * global.stride * 8; + io_addr = ioremap(addr, size); + if (!io_addr) + return -ENOMEM; + + /* Parsing Unit Discovery State */ + for (i = 0; i < global.max_units; i++) { + memcpy_fromio(&unit, io_addr + (i + 1) * (global.stride * 8), + sizeof(struct uncore_unit_discovery)); + + if (uncore_discovery_invalid_unit(unit)) + continue; + + if (unit.access_type >= UNCORE_ACCESS_MAX) + continue; + + uncore_insert_box_info(&unit, die, *parsed); + } + + *parsed = true; + iounmap(io_addr); + return 0; +} + +bool intel_uncore_has_discovery_tables(void) +{ + u32 device, val, entry_id, bar_offset; + int die, dvsec = 0, ret = true; + struct pci_dev *dev = NULL; + bool parsed = false; + + if (has_generic_discovery_table()) + device = UNCORE_DISCOVERY_TABLE_DEVICE; + else + device = PCI_ANY_ID; + + /* + * Start a new search and iterates through the list of + * the discovery table devices. + */ + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { + while ((dvsec = pci_find_next_ext_capability(dev, dvsec, UNCORE_EXT_CAP_ID_DISCOVERY))) { + pci_read_config_dword(dev, dvsec + UNCORE_DISCOVERY_DVSEC_OFFSET, &val); + entry_id = val & UNCORE_DISCOVERY_DVSEC_ID_MASK; + if (entry_id != UNCORE_DISCOVERY_DVSEC_ID_PMON) + continue; + + pci_read_config_dword(dev, dvsec + UNCORE_DISCOVERY_DVSEC2_OFFSET, &val); + + if (val & ~UNCORE_DISCOVERY_DVSEC2_BIR_MASK) { + ret = false; + goto err; + } + bar_offset = UNCORE_DISCOVERY_BIR_BASE + + (val & UNCORE_DISCOVERY_DVSEC2_BIR_MASK) * UNCORE_DISCOVERY_BIR_STEP; + + die = get_device_die_id(dev); + if (die < 0) + continue; + + parse_discovery_table(dev, die, bar_offset, &parsed); + } + } + + /* None of the discovery tables are available */ + if (!parsed) + ret = false; +err: + pci_dev_put(dev); + + return ret; +} + +void intel_uncore_clear_discovery_tables(void) +{ + struct intel_uncore_discovery_type *type, *next; + + rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) { + kfree(type->box_ctrl_die); + kfree(type); + } +} + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh, thresh, "config:24-31"); + +static struct attribute *generic_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh.attr, + NULL, +}; + +static const struct attribute_group generic_uncore_format_group = { + .name = "format", + .attrs = generic_uncore_formats_attr, +}; + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); +} + +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); +} + +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), 0); +} + +static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, 0); +} + +static struct intel_uncore_ops generic_uncore_msr_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = intel_generic_uncore_msr_disable_event, + .enable_event = intel_generic_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT); +} + +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ); +} + +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, 0); +} + +static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, 0); +} + +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops generic_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = intel_generic_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define UNCORE_GENERIC_MMIO_SIZE 0x4000 + +static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + struct intel_uncore_type *type = box->pmu->type; + + if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets) + return 0; + + return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx]; +} + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + u64 box_ctl = generic_uncore_mmio_box_ctl(box); + struct intel_uncore_type *type = box->pmu->type; + resource_size_t addr; + + if (!box_ctl) { + pr_warn("Uncore type %d box %d: Invalid box control address.\n", + type->type_id, type->box_ids[box->pmu->pmu_idx]); + return; + } + + addr = box_ctl; + box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE); + if (!box->io_addr) { + pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n", + type->type_id, type->box_ids[box->pmu->pmu_idx], + (unsigned long long)addr); + return; + } + + writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr); +} + +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr); +} + +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr); +} + +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(0, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops generic_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static bool uncore_update_uncore_type(enum uncore_access_type type_id, + struct intel_uncore_type *uncore, + struct intel_uncore_discovery_type *type) +{ + uncore->type_id = type->type; + uncore->num_boxes = type->num_boxes; + uncore->num_counters = type->num_counters; + uncore->perf_ctr_bits = type->counter_width; + uncore->box_ids = type->ids; + + switch (type_id) { + case UNCORE_ACCESS_MSR: + uncore->ops = &generic_uncore_msr_ops; + uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset; + uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset; + uncore->box_ctl = (unsigned int)type->box_ctrl; + uncore->msr_offsets = type->box_offset; + break; + case UNCORE_ACCESS_PCI: + uncore->ops = &generic_uncore_pci_ops; + uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset; + uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset; + uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl); + uncore->box_ctls = type->box_ctrl_die; + uncore->pci_offsets = type->box_offset; + break; + case UNCORE_ACCESS_MMIO: + uncore->ops = &generic_uncore_mmio_ops; + uncore->perf_ctr = (unsigned int)type->ctr_offset; + uncore->event_ctl = (unsigned int)type->ctl_offset; + uncore->box_ctl = (unsigned int)type->box_ctrl; + uncore->box_ctls = type->box_ctrl_die; + uncore->mmio_offsets = type->box_offset; + uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE; + break; + default: + return false; + } + + return true; +} + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra) +{ + struct intel_uncore_discovery_type *type; + struct intel_uncore_type **uncores; + struct intel_uncore_type *uncore; + struct rb_node *node; + int i = 0; + + uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1, + sizeof(struct intel_uncore_type *), GFP_KERNEL); + if (!uncores) + return empty_uncore; + + for (node = rb_first(&discovery_tables); node; node = rb_next(node)) { + type = rb_entry(node, struct intel_uncore_discovery_type, node); + if (type->access_type != type_id) + continue; + + uncore = kzalloc(sizeof(struct intel_uncore_type), GFP_KERNEL); + if (!uncore) + break; + + uncore->event_mask = GENERIC_PMON_RAW_EVENT_MASK; + uncore->format_group = &generic_uncore_format_group; + + if (!uncore_update_uncore_type(type_id, uncore, type)) { + kfree(uncore); + continue; + } + uncores[i++] = uncore; + } + + return uncores; +} + +void intel_uncore_generic_uncore_cpu_init(void) +{ + uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0); +} + +int intel_uncore_generic_uncore_pci_init(void) +{ + uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0); + + return 0; +} + +void intel_uncore_generic_uncore_mmio_init(void) +{ + uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0); +} diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h new file mode 100644 index 000000000..f44393577 --- /dev/null +++ b/arch/x86/events/intel/uncore_discovery.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Generic device ID of a discovery table device */ +#define UNCORE_DISCOVERY_TABLE_DEVICE 0x09a7 +/* Capability ID for a discovery table device */ +#define UNCORE_EXT_CAP_ID_DISCOVERY 0x23 +/* First DVSEC offset */ +#define UNCORE_DISCOVERY_DVSEC_OFFSET 0x8 +/* Mask of the supported discovery entry type */ +#define UNCORE_DISCOVERY_DVSEC_ID_MASK 0xffff +/* PMON discovery entry type ID */ +#define UNCORE_DISCOVERY_DVSEC_ID_PMON 0x1 +/* Second DVSEC offset */ +#define UNCORE_DISCOVERY_DVSEC2_OFFSET 0xc +/* Mask of the discovery table BAR offset */ +#define UNCORE_DISCOVERY_DVSEC2_BIR_MASK 0x7 +/* Discovery table BAR base offset */ +#define UNCORE_DISCOVERY_BIR_BASE 0x10 +/* Discovery table BAR step */ +#define UNCORE_DISCOVERY_BIR_STEP 0x4 +/* Global discovery table size */ +#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20 + +#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7) +#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff) +#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff) +#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff) + + +#define uncore_discovery_invalid_unit(unit) \ + (!unit.table1 || !unit.ctl || \ + unit.table1 == -1ULL || unit.ctl == -1ULL || \ + unit.table3 == -1ULL) + +#define GENERIC_PMON_CTL_EV_SEL_MASK 0x000000ff +#define GENERIC_PMON_CTL_UMASK_MASK 0x0000ff00 +#define GENERIC_PMON_CTL_EDGE_DET (1 << 18) +#define GENERIC_PMON_CTL_INVERT (1 << 23) +#define GENERIC_PMON_CTL_TRESH_MASK 0xff000000 +#define GENERIC_PMON_RAW_EVENT_MASK (GENERIC_PMON_CTL_EV_SEL_MASK | \ + GENERIC_PMON_CTL_UMASK_MASK | \ + GENERIC_PMON_CTL_EDGE_DET | \ + GENERIC_PMON_CTL_INVERT | \ + GENERIC_PMON_CTL_TRESH_MASK) + +#define GENERIC_PMON_BOX_CTL_FRZ (1 << 0) +#define GENERIC_PMON_BOX_CTL_RST_CTRL (1 << 8) +#define GENERIC_PMON_BOX_CTL_RST_CTRS (1 << 9) +#define GENERIC_PMON_BOX_CTL_INT (GENERIC_PMON_BOX_CTL_RST_CTRL | \ + GENERIC_PMON_BOX_CTL_RST_CTRS) + +enum uncore_access_type { + UNCORE_ACCESS_MSR = 0, + UNCORE_ACCESS_MMIO, + UNCORE_ACCESS_PCI, + + UNCORE_ACCESS_MAX, +}; + +struct uncore_global_discovery { + union { + u64 table1; + struct { + u64 type : 8, + stride : 8, + max_units : 10, + __reserved_1 : 36, + access_type : 2; + }; + }; + + u64 ctl; /* Global Control Address */ + + union { + u64 table3; + struct { + u64 status_offset : 8, + num_status : 16, + __reserved_2 : 40; + }; + }; +}; + +struct uncore_unit_discovery { + union { + u64 table1; + struct { + u64 num_regs : 8, + ctl_offset : 8, + bit_width : 8, + ctr_offset : 8, + status_offset : 8, + __reserved_1 : 22, + access_type : 2; + }; + }; + + u64 ctl; /* Unit Control Address */ + + union { + u64 table3; + struct { + u64 box_type : 16, + box_id : 16, + __reserved_2 : 32; + }; + }; +}; + +struct intel_uncore_discovery_type { + struct rb_node node; + enum uncore_access_type access_type; + u64 box_ctrl; /* Unit ctrl addr of the first box */ + u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */ + u16 type; /* Type ID of the uncore block */ + u8 num_counters; + u8 counter_width; + u8 ctl_offset; /* Counter Control 0 offset */ + u8 ctr_offset; /* Counter 0 offset */ + u16 num_boxes; /* number of boxes for the uncore block */ + unsigned int *ids; /* Box IDs */ + unsigned int *box_offset; /* Box offset */ +}; + +bool intel_uncore_has_discovery_tables(void); +void intel_uncore_clear_discovery_tables(void); +void intel_uncore_generic_uncore_cpu_init(void); +int intel_uncore_generic_uncore_pci_init(void); +void intel_uncore_generic_uncore_mmio_init(void); + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box); + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event); +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event); + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event); +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event); + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra); diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c new file mode 100644 index 000000000..173e2674b --- /dev/null +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -0,0 +1,1228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Nehalem-EX/Westmere-EX uncore support */ +#include "uncore.h" + +/* NHM-EX event control */ +#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff +#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 +#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) +#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) +#define NHMEX_PMON_CTL_PMI_EN (1 << 20) +#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) +#define NHMEX_PMON_CTL_INVERT (1 << 23) +#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 +#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_UMASK_MASK | \ + NHMEX_PMON_CTL_EDGE_DET | \ + NHMEX_PMON_CTL_INVERT | \ + NHMEX_PMON_CTL_TRESH_MASK) + +/* NHM-EX Ubox */ +#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define NHMEX_U_MSR_PMON_CTR 0xc11 +#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 + +#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) +#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e +#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) +#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) +#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) + +#define NHMEX_U_PMON_RAW_EVENT_MASK \ + (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_EDGE_DET) + +/* NHM-EX Cbox */ +#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 +#define NHMEX_C0_MSR_PMON_CTR0 0xd11 +#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 +#define NHMEX_C_MSR_OFFSET 0x20 + +/* NHM-EX Bbox */ +#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 +#define NHMEX_B0_MSR_PMON_CTR0 0xc31 +#define NHMEX_B0_MSR_PMON_CTL0 0xc30 +#define NHMEX_B_MSR_OFFSET 0x40 +#define NHMEX_B0_MSR_MATCH 0xe45 +#define NHMEX_B0_MSR_MASK 0xe46 +#define NHMEX_B1_MSR_MATCH 0xe4d +#define NHMEX_B1_MSR_MASK 0xe4e + +#define NHMEX_B_PMON_CTL_EN (1 << 0) +#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_B_PMON_CTR_SHIFT 6 +#define NHMEX_B_PMON_CTR_MASK \ + (0x3 << NHMEX_B_PMON_CTR_SHIFT) +#define NHMEX_B_PMON_RAW_EVENT_MASK \ + (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ + NHMEX_B_PMON_CTR_MASK) + +/* NHM-EX Sbox */ +#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 +#define NHMEX_S0_MSR_PMON_CTR0 0xc51 +#define NHMEX_S0_MSR_PMON_CTL0 0xc50 +#define NHMEX_S_MSR_OFFSET 0x80 +#define NHMEX_S0_MSR_MM_CFG 0xe48 +#define NHMEX_S0_MSR_MATCH 0xe49 +#define NHMEX_S0_MSR_MASK 0xe4a +#define NHMEX_S1_MSR_MM_CFG 0xe58 +#define NHMEX_S1_MSR_MATCH 0xe59 +#define NHMEX_S1_MSR_MASK 0xe5a + +#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) +#define NHMEX_S_EVENT_TO_R_PROG_EV 0 + +/* NHM-EX Mbox */ +#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 +#define NHMEX_M0_MSR_PMU_DSP 0xca5 +#define NHMEX_M0_MSR_PMU_ISS 0xca6 +#define NHMEX_M0_MSR_PMU_MAP 0xca7 +#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 +#define NHMEX_M0_MSR_PMU_PGT 0xca9 +#define NHMEX_M0_MSR_PMU_PLD 0xcaa +#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab +#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 +#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 +#define NHMEX_M_MSR_OFFSET 0x40 +#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 +#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c + +#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) +#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL +#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL +#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 + +#define NHMEX_M_PMON_CTL_EN (1 << 0) +#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) +#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 +#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 +#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) +#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) +#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 +#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ + (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ + (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) +#define NHMEX_M_PMON_RAW_EVENT_MASK \ + (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ + NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ + NHMEX_M_PMON_CTL_WRAP_MODE | \ + NHMEX_M_PMON_CTL_FLAG_MODE | \ + NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) + +#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) +#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) + +#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) +#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) + +/* + * use the 9~13 bits to select event If the 7th bit is not set, + * otherwise use the 19~21 bits to select event. + */ +#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_EXTAR_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) +#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_SET_FLAG_SEL_MASK, \ + (u64)-1, NHMEX_M_##r) + +/* NHM-EX Rbox */ +#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 +#define NHMEX_R_MSR_PMON_CTL0 0xe10 +#define NHMEX_R_MSR_PMON_CNT0 0xe11 +#define NHMEX_R_MSR_OFFSET 0x20 + +#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ + ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) +#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ + (((n) < 4 ? 0 : 0x10) + (n) * 4) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ + (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ + (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) + +#define NHMEX_R_PMON_CTL_EN (1 << 0) +#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) +#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK + +/* NHM-EX Wbox */ +#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 +#define NHMEX_W_MSR_PMON_CNT0 0xc90 +#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 +#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 +#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 + +#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) + +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); +DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); + +static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); +} + +static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); +} + +static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config &= ~((1ULL << uncore_num_counters(box)) - 1); + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config |= (1ULL << uncore_num_counters(box)) - 1; + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx == UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + else + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +#define NHMEX_UNCORE_OPS_COMMON_INIT() \ + .init_box = nhmex_uncore_msr_init_box, \ + .exit_box = nhmex_uncore_msr_exit_box, \ + .disable_box = nhmex_uncore_msr_disable_box, \ + .enable_box = nhmex_uncore_msr_enable_box, \ + .disable_event = nhmex_uncore_msr_disable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops nhmex_uncore_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_uncore_msr_enable_event, +}; + +static struct attribute *nhmex_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_edge.attr, + NULL, +}; + +static const struct attribute_group nhmex_uncore_ubox_format_group = { + .name = "format", + .attrs = nhmex_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type nhmex_uncore_ubox = { + .name = "ubox", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, + .perf_ctr = NHMEX_U_MSR_PMON_CTR, + .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_ubox_format_group +}; + +static struct attribute *nhmex_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group nhmex_uncore_cbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_cbox_formats_attr, +}; + +/* msr offset for each instance of cbox */ +static unsigned nhmex_cbox_msr_offsets[] = { + 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, +}; + +static struct intel_uncore_type nhmex_uncore_cbox = { + .name = "cbox", + .num_counters = 6, + .num_boxes = 10, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, + .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, + .msr_offsets = nhmex_cbox_msr_offsets, + .pair_ctr_ctl = 1, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static struct uncore_event_desc nhmex_uncore_wbox_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type nhmex_uncore_wbox = { + .name = "wbox", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_W_MSR_PMON_CNT0, + .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, + .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, + .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, + .pair_ctr_ctl = 1, + .event_descs = nhmex_uncore_wbox_events, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int ctr, ev_sel; + + ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> + NHMEX_B_PMON_CTR_SHIFT; + ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> + NHMEX_B_PMON_CTL_EV_SEL_SHIFT; + + /* events that do not use the match/mask registers */ + if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || + (ctr == 2 && ev_sel != 0x4) || ctr == 3) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_B0_MSR_MATCH; + else + reg1->reg = NHMEX_B1_MSR_MATCH; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg + 1, reg2->config); + } + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); +} + +/* + * The Bbox has 4 counters, but each counter monitors different events. + * Use bits 6-7 in the event config to select counter. + */ +static struct event_constraint nhmex_uncore_bbox_constraints[] = { + EVENT_CONSTRAINT(0 , 1, 0xc0), + EVENT_CONSTRAINT(0x40, 2, 0xc0), + EVENT_CONSTRAINT(0x80, 4, 0xc0), + EVENT_CONSTRAINT(0xc0, 8, 0xc0), + EVENT_CONSTRAINT_END, +}; + +static struct attribute *nhmex_uncore_bbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_counter.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static const struct attribute_group nhmex_uncore_bbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_bbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_bbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_bbox_msr_enable_event, + .hw_config = nhmex_bbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_bbox = { + .name = "bbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_B0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, + .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_B_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .constraints = nhmex_uncore_bbox_constraints, + .ops = &nhmex_uncore_bbox_ops, + .format_group = &nhmex_uncore_bbox_format_group +}; + +static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + /* only TO_R_PROG_EV event uses the match/mask register */ + if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) != + NHMEX_S_EVENT_TO_R_PROG_EV) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_S0_MSR_MM_CFG; + else + reg1->reg = NHMEX_S1_MSR_MM_CFG; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, 0); + wrmsrl(reg1->reg + 1, reg1->config); + wrmsrl(reg1->reg + 2, reg2->config); + wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + } + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); +} + +static struct attribute *nhmex_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static const struct attribute_group nhmex_uncore_sbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_sbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_sbox_msr_enable_event, + .hw_config = nhmex_sbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_S0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_S_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .ops = &nhmex_uncore_sbox_ops, + .format_group = &nhmex_uncore_sbox_format_group +}; + +enum { + EXTRA_REG_NHMEX_M_FILTER, + EXTRA_REG_NHMEX_M_DSP, + EXTRA_REG_NHMEX_M_ISS, + EXTRA_REG_NHMEX_M_MAP, + EXTRA_REG_NHMEX_M_MSC_THR, + EXTRA_REG_NHMEX_M_PGT, + EXTRA_REG_NHMEX_M_PLD, + EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, +}; + +static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { + MBOX_INC_SEL_EXTAR_REG(0x0, DSP), + MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x9, ISS), + /* event 0xa uses two extra registers */ + MBOX_INC_SEL_EXTAR_REG(0xa, ISS), + MBOX_INC_SEL_EXTAR_REG(0xa, PLD), + MBOX_INC_SEL_EXTAR_REG(0xb, PLD), + /* events 0xd ~ 0x10 use the same extra register */ + MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x16, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), + EVENT_EXTRA_END +}; + +/* Nehalem-EX or Westmere-EX ? */ +static bool uncore_nhmex; + +static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + bool ret = false; + u64 mask; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || er->config == config) { + atomic_inc(&er->ref); + er->config = config; + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; + } + /* + * The ZDP_CTL_FVC MSR has 4 fields which are used to control + * events 0xd ~ 0x10. Besides these 4 fields, there are additional + * fields which are shared. + */ + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (WARN_ON_ONCE(idx >= 4)) + return false; + + /* mask of the shared fields */ + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + + raw_spin_lock_irqsave(&er->lock, flags); + /* add mask of the non-shared field if it's in use */ + if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) { + if (uncore_nhmex) + mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + } + + if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | + NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK | + WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + er->config &= ~mask; + er->config |= (config & mask); + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; +} + +static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + atomic_dec(&er->ref); + return; + } + + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + atomic_sub(1 << (idx * 8), &er->ref); +} + +static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); + u64 config = reg1->config; + + /* get the non-shared control bits and shift them */ + idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (uncore_nhmex) + config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + if (new_idx > orig_idx) { + idx = new_idx - orig_idx; + config <<= 3 * idx; + } else { + idx = orig_idx - new_idx; + config >>= 3 * idx; + } + + /* add the shared control bits back */ + if (uncore_nhmex) + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + else + config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + if (modify) { + /* adjust the main event selector */ + if (new_idx > orig_idx) + hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + else + hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + reg1->config = config; + reg1->idx = ~0xff | new_idx; + } + return config; +} + +static struct event_constraint * +nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int i, idx[2], alloc = 0; + u64 config1 = reg1->config; + + idx[0] = __BITS_VALUE(reg1->idx, 0, 8); + idx[1] = __BITS_VALUE(reg1->idx, 1, 8); +again: + for (i = 0; i < 2; i++) { + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + idx[i] = 0xff; + + if (idx[i] == 0xff) + continue; + + if (!nhmex_mbox_get_shared_reg(box, idx[i], + __BITS_VALUE(config1, i, 32))) + goto fail; + alloc |= (0x1 << i); + } + + /* for the match/mask registers */ + if (reg2->idx != EXTRA_REG_NONE && + (uncore_box_is_fake(box) || !reg2->alloc) && + !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) + goto fail; + + /* + * If it's a fake box -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + */ + if (!uncore_box_is_fake(box)) { + if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) + nhmex_mbox_alter_er(event, idx[0], true); + reg1->alloc |= alloc; + if (reg2->idx != EXTRA_REG_NONE) + reg2->alloc = 1; + } + return NULL; +fail: + if (idx[0] != 0xff && !(alloc & 0x1) && + idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + /* + * events 0xd ~ 0x10 are functional identical, but are + * controlled by different fields in the ZDP_CTL_FVC + * register. If we failed to take one field, try the + * rest 3 choices. + */ + BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); + idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + idx[0] = (idx[0] + 1) % 4; + idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { + config1 = nhmex_mbox_alter_er(event, idx[0], false); + goto again; + } + } + + if (alloc & 0x1) + nhmex_mbox_put_shared_reg(box, idx[0]); + if (alloc & 0x2) + nhmex_mbox_put_shared_reg(box, idx[1]); + return &uncore_constraint_empty; +} + +static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + + if (uncore_box_is_fake(box)) + return; + + if (reg1->alloc & 0x1) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); + if (reg1->alloc & 0x2) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); + reg1->alloc = 0; + + if (reg2->alloc) { + nhmex_mbox_put_shared_reg(box, reg2->idx); + reg2->alloc = 0; + } +} + +static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) +{ + if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return er->idx; + return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; +} + +static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_type *type = box->pmu->type; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + struct extra_reg *er; + unsigned msr; + int reg_idx = 0; + /* + * The mbox events may require 2 extra MSRs at the most. But only + * the lower 32 bits in these MSRs are significant, so we can use + * config1 to pass two MSRs' config. + */ + for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + + msr = er->msr + type->msr_offset * box->pmu->pmu_idx; + if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) + return -EINVAL; + + /* always use the 32~63 bits to pass the PLD config */ + if (er->idx == EXTRA_REG_NHMEX_M_PLD) + reg_idx = 1; + else if (WARN_ON_ONCE(reg_idx > 0)) + return -EINVAL; + + reg1->idx &= ~(0xff << (reg_idx * 8)); + reg1->reg &= ~(0xffff << (reg_idx * 16)); + reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); + reg1->reg |= msr << (reg_idx * 16); + reg1->config = event->attr.config1; + reg_idx++; + } + /* + * The mbox only provides ability to perform address matching + * for the PLD events. + */ + if (reg_idx == 2) { + reg2->idx = EXTRA_REG_NHMEX_M_FILTER; + if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) + reg2->config = event->attr.config2; + else + reg2->config = ~0ULL; + if (box->pmu->pmu_idx == 0) + reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; + else + reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; + } + return 0; +} + +static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return box->shared_regs[idx].config; + + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + return config; +} + +static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx; + + idx = __BITS_VALUE(reg1->idx, 0, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + nhmex_mbox_shared_reg_config(box, idx)); + idx = __BITS_VALUE(reg1->idx, 1, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + nhmex_mbox_shared_reg_config(box, idx)); + + if (reg2->idx != EXTRA_REG_NONE) { + wrmsrl(reg2->reg, 0); + if (reg2->config != ~0ULL) { + wrmsrl(reg2->reg + 1, + reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); + wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); + wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + } + } + + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); +DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); +DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); +DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); +DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); +DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); +DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); + +static struct attribute *nhmex_uncore_mbox_formats_attr[] = { + &format_attr_count_mode.attr, + &format_attr_storage_mode.attr, + &format_attr_wrap_mode.attr, + &format_attr_flag_mode.attr, + &format_attr_inc_sel.attr, + &format_attr_set_flag_sel.attr, + &format_attr_filter_cfg_en.attr, + &format_attr_filter_match.attr, + &format_attr_filter_mask.attr, + &format_attr_dsp.attr, + &format_attr_thr.attr, + &format_attr_fvc.attr, + &format_attr_pgt.attr, + &format_attr_map.attr, + &format_attr_iss.attr, + &format_attr_pld.attr, + NULL, +}; + +static const struct attribute_group nhmex_uncore_mbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_mbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc wsmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_mbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_mbox_msr_enable_event, + .hw_config = nhmex_mbox_hw_config, + .get_constraint = nhmex_mbox_get_constraint, + .put_constraint = nhmex_mbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_mbox = { + .name = "mbox", + .num_counters = 6, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_M0_MSR_PMU_CTL0, + .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, + .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_M_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 8, + .event_descs = nhmex_uncore_mbox_events, + .ops = &nhmex_uncore_mbox_ops, + .format_group = &nhmex_uncore_mbox_format_group, +}; + +static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + /* adjust the main event selector and extra register index */ + if (reg1->idx % 2) { + reg1->idx--; + hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } else { + reg1->idx++; + hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } + + /* adjust extra register config */ + switch (reg1->idx % 6) { + case 2: + /* shift the 8~15 bits to the 0~7 bits */ + reg1->config >>= 8; + break; + case 3: + /* shift the 0~7 bits to the 8~15 bits */ + reg1->config <<= 8; + break; + } +} + +/* + * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. + * An event set consists of 6 events, the 3rd and 4th events in + * an event set use the same extra register. So an event set uses + * 5 extra registers. + */ +static struct event_constraint * +nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + struct intel_uncore_extra_reg *er; + unsigned long flags; + int idx, er_idx; + u64 config1; + bool ok = false; + + if (!uncore_box_is_fake(box) && reg1->alloc) + return NULL; + + idx = reg1->idx % 6; + config1 = reg1->config; +again: + er_idx = idx; + /* the 3rd and 4th events use the same extra register */ + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (idx < 2) { + if (!atomic_read(&er->ref) || er->config == reg1->config) { + atomic_inc(&er->ref); + er->config = reg1->config; + ok = true; + } + } else if (idx == 2 || idx == 3) { + /* + * these two events use different fields in a extra register, + * the 0~7 bits and the 8~15 bits respectively. + */ + u64 mask = 0xff << ((idx - 2) * 8); + if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || + !((er->config ^ config1) & mask)) { + atomic_add(1 << ((idx - 2) * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + } else { + if (!atomic_read(&er->ref) || + (er->config == (hwc->config >> 32) && + er->config1 == reg1->config && + er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config = (hwc->config >> 32); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + /* + * The Rbox events are always in pairs. The paired + * events are functional identical, but use different + * extra registers. If we failed to take an extra + * register, try the alternative. + */ + idx ^= 1; + if (idx != reg1->idx % 6) { + if (idx == 2) + config1 >>= 8; + else if (idx == 3) + config1 <<= 8; + goto again; + } + } else { + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx % 6) + nhmex_rbox_alter_er(box, event); + reg1->alloc = 1; + } + return NULL; + } + return &uncore_constraint_empty; +} + +static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + int idx, er_idx; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + idx = reg1->idx % 6; + er_idx = idx; + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + if (idx == 2 || idx == 3) + atomic_sub(1 << ((idx - 2) * 8), &er->ref); + else + atomic_dec(&er->ref); + + reg1->alloc = 0; +} + +static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int idx; + + idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> + NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + if (idx >= 0x18) + return -EINVAL; + + reg1->idx = idx; + reg1->config = event->attr.config1; + + switch (idx % 6) { + case 4: + case 5: + hwc->config |= event->attr.config & (~0ULL << 32); + reg2->config = event->attr.config2; + break; + } + return 0; +} + +static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx, port; + + idx = reg1->idx; + port = idx / 6 + box->pmu->pmu_idx * 4; + + switch (idx % 6) { + case 0: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); + break; + case 1: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); + break; + case 2: + case 3: + wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), + uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); + break; + case 4: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); + break; + case 5: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); + break; + } + + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); +} + +DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); +DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); + +static struct attribute *nhmex_uncore_rbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_xbr_mm_cfg.attr, + &format_attr_xbr_match.attr, + &format_attr_xbr_mask.attr, + &format_attr_qlx_cfg.attr, + &format_attr_iperf_cfg.attr, + NULL, +}; + +static const struct attribute_group nhmex_uncore_rbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_rbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_rbox_events[] = { + INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), + INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_rbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_rbox_msr_enable_event, + .hw_config = nhmex_rbox_hw_config, + .get_constraint = nhmex_rbox_get_constraint, + .put_constraint = nhmex_rbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_rbox = { + .name = "rbox", + .num_counters = 8, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_R_MSR_PMON_CTL0, + .perf_ctr = NHMEX_R_MSR_PMON_CNT0, + .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_R_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 20, + .event_descs = nhmex_uncore_rbox_events, + .ops = &nhmex_uncore_rbox_ops, + .format_group = &nhmex_uncore_rbox_format_group +}; + +static struct intel_uncore_type *nhmex_msr_uncores[] = { + &nhmex_uncore_ubox, + &nhmex_uncore_cbox, + &nhmex_uncore_bbox, + &nhmex_uncore_sbox, + &nhmex_uncore_mbox, + &nhmex_uncore_rbox, + &nhmex_uncore_wbox, + NULL, +}; + +void nhmex_uncore_cpu_init(void) +{ + if (boot_cpu_data.x86_model == 46) + uncore_nhmex = true; + else + nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; + if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = nhmex_msr_uncores; +} +/* end of Nehalem-EX uncore support */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c new file mode 100644 index 000000000..7fd4334e1 --- /dev/null +++ b/arch/x86/events/intel/uncore_snb.c @@ -0,0 +1,1705 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ +#include "uncore.h" +#include "uncore_discovery.h" + +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 +#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_E3_IMC 0x1918 +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910 +#define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918 +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 +#define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d +#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34 +#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 +#define PCI_DEVICE_ID_INTEL_CML_H1_IMC 0x9b44 +#define PCI_DEVICE_ID_INTEL_CML_H2_IMC 0x9b54 +#define PCI_DEVICE_ID_INTEL_CML_H3_IMC 0x9b64 +#define PCI_DEVICE_ID_INTEL_CML_U1_IMC 0x9b51 +#define PCI_DEVICE_ID_INTEL_CML_U2_IMC 0x9b61 +#define PCI_DEVICE_ID_INTEL_CML_U3_IMC 0x9b71 +#define PCI_DEVICE_ID_INTEL_CML_S1_IMC 0x9b33 +#define PCI_DEVICE_ID_INTEL_CML_S2_IMC 0x9b43 +#define PCI_DEVICE_ID_INTEL_CML_S3_IMC 0x9b53 +#define PCI_DEVICE_ID_INTEL_CML_S4_IMC 0x9b63 +#define PCI_DEVICE_ID_INTEL_CML_S5_IMC 0x9b73 +#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 +#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 +#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02 +#define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04 +#define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12 +#define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14 +#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36 +#define PCI_DEVICE_ID_INTEL_RKL_1_IMC 0x4c43 +#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53 +#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660 +#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641 +#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601 +#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602 +#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609 +#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a +#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621 +#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623 +#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629 +#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637 +#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b +#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648 +#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649 +#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 +#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 +#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 +#define PCI_DEVICE_ID_INTEL_ADL_17_IMC 0x4614 +#define PCI_DEVICE_ID_INTEL_ADL_18_IMC 0x4617 +#define PCI_DEVICE_ID_INTEL_ADL_19_IMC 0x4618 +#define PCI_DEVICE_ID_INTEL_ADL_20_IMC 0x461B +#define PCI_DEVICE_ID_INTEL_ADL_21_IMC 0x461C +#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700 +#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702 +#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709 +#define PCI_DEVICE_ID_INTEL_RPL_5_IMC 0xA701 +#define PCI_DEVICE_ID_INTEL_RPL_6_IMC 0xA703 +#define PCI_DEVICE_ID_INTEL_RPL_7_IMC 0xA704 +#define PCI_DEVICE_ID_INTEL_RPL_8_IMC 0xA705 +#define PCI_DEVICE_ID_INTEL_RPL_9_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_10_IMC 0xA707 +#define PCI_DEVICE_ID_INTEL_RPL_11_IMC 0xA708 +#define PCI_DEVICE_ID_INTEL_RPL_12_IMC 0xA709 +#define PCI_DEVICE_ID_INTEL_RPL_13_IMC 0xA70a +#define PCI_DEVICE_ID_INTEL_RPL_14_IMC 0xA70b +#define PCI_DEVICE_ID_INTEL_RPL_15_IMC 0xA715 +#define PCI_DEVICE_ID_INTEL_RPL_16_IMC 0xA716 +#define PCI_DEVICE_ID_INTEL_RPL_17_IMC 0xA717 +#define PCI_DEVICE_ID_INTEL_RPL_18_IMC 0xA718 +#define PCI_DEVICE_ID_INTEL_RPL_19_IMC 0xA719 +#define PCI_DEVICE_ID_INTEL_RPL_20_IMC 0xA71A +#define PCI_DEVICE_ID_INTEL_RPL_21_IMC 0xA71B +#define PCI_DEVICE_ID_INTEL_RPL_22_IMC 0xA71C +#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728 +#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729 +#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A +#define PCI_DEVICE_ID_INTEL_MTL_1_IMC 0x7d00 +#define PCI_DEVICE_ID_INTEL_MTL_2_IMC 0x7d01 +#define PCI_DEVICE_ID_INTEL_MTL_3_IMC 0x7d02 +#define PCI_DEVICE_ID_INTEL_MTL_4_IMC 0x7d05 +#define PCI_DEVICE_ID_INTEL_MTL_5_IMC 0x7d10 +#define PCI_DEVICE_ID_INTEL_MTL_6_IMC 0x7d14 +#define PCI_DEVICE_ID_INTEL_MTL_7_IMC 0x7d15 +#define PCI_DEVICE_ID_INTEL_MTL_8_IMC 0x7d16 +#define PCI_DEVICE_ID_INTEL_MTL_9_IMC 0x7d21 +#define PCI_DEVICE_ID_INTEL_MTL_10_IMC 0x7d22 +#define PCI_DEVICE_ID_INTEL_MTL_11_IMC 0x7d23 +#define PCI_DEVICE_ID_INTEL_MTL_12_IMC 0x7d24 +#define PCI_DEVICE_ID_INTEL_MTL_13_IMC 0x7d28 + + +#define IMC_UNCORE_DEV(a) \ +{ \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_##a##_IMC), \ + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), \ +} + +/* SNB event control */ +#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff +#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 +#define SNB_UNC_CTL_EDGE_DET (1 << 18) +#define SNB_UNC_CTL_EN (1 << 22) +#define SNB_UNC_CTL_INVERT (1 << 23) +#define SNB_UNC_CTL_CMASK_MASK 0x1f000000 +#define NHM_UNC_CTL_CMASK_MASK 0xff000000 +#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) + +#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + SNB_UNC_CTL_CMASK_MASK) + +#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + NHM_UNC_CTL_CMASK_MASK) + +/* SNB global control register */ +#define SNB_UNC_PERF_GLOBAL_CTL 0x391 +#define SNB_UNC_FIXED_CTR_CTRL 0x394 +#define SNB_UNC_FIXED_CTR 0x395 + +/* SNB uncore global control */ +#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) +#define SNB_UNC_GLOBAL_CTL_EN (1 << 29) + +/* SNB Cbo register */ +#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 +#define SNB_UNC_CBO_0_PER_CTR0 0x706 +#define SNB_UNC_CBO_MSR_OFFSET 0x10 + +/* SNB ARB register */ +#define SNB_UNC_ARB_PER_CTR0 0x3b0 +#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2 +#define SNB_UNC_ARB_MSR_OFFSET 0x10 + +/* NHM global control register */ +#define NHM_UNC_PERF_GLOBAL_CTL 0x391 +#define NHM_UNC_FIXED_CTR 0x394 +#define NHM_UNC_FIXED_CTR_CTRL 0x395 + +/* NHM uncore global control */ +#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) +#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) + +/* NHM uncore register */ +#define NHM_UNC_PERFEVTSEL0 0x3c0 +#define NHM_UNC_UNCORE_PMC0 0x3b0 + +/* SKL uncore global control */ +#define SKL_UNC_PERF_GLOBAL_CTL 0xe01 +#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1) + +/* ICL Cbo register */ +#define ICL_UNC_CBO_CONFIG 0x396 +#define ICL_UNC_NUM_CBO_MASK 0xf +#define ICL_UNC_CBO_0_PER_CTR0 0x702 +#define ICL_UNC_CBO_MSR_OFFSET 0x8 + +/* ICL ARB register */ +#define ICL_UNC_ARB_PER_CTR 0x3b1 +#define ICL_UNC_ARB_PERFEVTSEL 0x3b3 + +/* ADL uncore global control */ +#define ADL_UNC_PERF_GLOBAL_CTL 0x2ff0 +#define ADL_UNC_FIXED_CTR_CTRL 0x2fde +#define ADL_UNC_FIXED_CTR 0x2fdf + +/* ADL Cbo register */ +#define ADL_UNC_CBO_0_PER_CTR0 0x2002 +#define ADL_UNC_CBO_0_PERFEVTSEL0 0x2000 +#define ADL_UNC_CTL_THRESHOLD 0x3f000000 +#define ADL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + ADL_UNC_CTL_THRESHOLD) + +/* ADL ARB register */ +#define ADL_UNC_ARB_PER_CTR0 0x2FD2 +#define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0 +#define ADL_UNC_ARB_MSR_OFFSET 0x8 + +/* MTL Cbo register */ +#define MTL_UNC_CBO_0_PER_CTR0 0x2448 +#define MTL_UNC_CBO_0_PERFEVTSEL0 0x2442 + +/* MTL HAC_ARB register */ +#define MTL_UNC_HAC_ARB_CTR 0x2018 +#define MTL_UNC_HAC_ARB_CTRL 0x2012 + +/* MTL ARB register */ +#define MTL_UNC_ARB_CTR 0x2418 +#define MTL_UNC_ARB_CTRL 0x2412 + +/* MTL cNCU register */ +#define MTL_UNC_CNCU_FIXED_CTR 0x2408 +#define MTL_UNC_CNCU_FIXED_CTRL 0x2402 +#define MTL_UNC_CNCU_BOX_CTL 0x240e + +/* MTL sNCU register */ +#define MTL_UNC_SNCU_FIXED_CTR 0x2008 +#define MTL_UNC_SNCU_FIXED_CTRL 0x2002 +#define MTL_UNC_SNCU_BOX_CTL 0x200e + +/* MTL HAC_CBO register */ +#define MTL_UNC_HBO_CTR 0x2048 +#define MTL_UNC_HBO_CTRL 0x2042 + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29"); + +/* Sandy Bridge uncore support */ +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); +} + +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void snb_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static void snb_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); +} + +static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + +static struct attribute *snb_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask5.attr, + NULL, +}; + +static const struct attribute_group snb_uncore_format_group = { + .name = "format", + .attrs = snb_uncore_formats_attr, +}; + +static struct intel_uncore_ops snb_uncore_msr_ops = { + .init_box = snb_uncore_msr_init_box, + .enable_box = snb_uncore_msr_enable_box, + .exit_box = snb_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct event_constraint snb_uncore_arb_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x80, 0x1), + UNCORE_EVENT_CONSTRAINT(0x83, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snb_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .num_boxes = 4, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &snb_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type snb_uncore_arb = { + .name = "arb", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .perf_ctr = SNB_UNC_ARB_PER_CTR0, + .event_ctl = SNB_UNC_ARB_PERFEVTSEL0, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_ARB_MSR_OFFSET, + .constraints = snb_uncore_arb_constraints, + .ops = &snb_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct intel_uncore_type *snb_msr_uncores[] = { + &snb_uncore_cbox, + &snb_uncore_arb, + NULL, +}; + +void snb_uncore_cpu_init(void) +{ + uncore_msr_uncores = snb_msr_uncores; + if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; +} + +static void skl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); + } + + /* The 8th CBOX has different MSR space */ + if (box->pmu->pmu_idx == 7) + __set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags); +} + +static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); +} + +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops skl_uncore_msr_ops = { + .init_box = skl_uncore_msr_init_box, + .enable_box = skl_uncore_msr_enable_box, + .exit_box = skl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type skl_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &skl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *skl_msr_uncores[] = { + &skl_uncore_cbox, + &snb_uncore_arb, + NULL, +}; + +void skl_uncore_cpu_init(void) +{ + uncore_msr_uncores = skl_msr_uncores; + if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + snb_uncore_arb.ops = &skl_uncore_msr_ops; +} + +static struct intel_uncore_ops icl_uncore_msr_ops = { + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type icl_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .perf_ctr_bits = 44, + .perf_ctr = ICL_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = ICL_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct uncore_event_desc icl_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"), + { /* end: all zeroes */ }, +}; + +static struct attribute *icl_uncore_clock_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group icl_uncore_clock_format_group = { + .name = "format", + .attrs = icl_uncore_clock_formats_attr, +}; + +static struct intel_uncore_type icl_uncore_clockbox = { + .name = "clock", + .num_counters = 1, + .num_boxes = 1, + .fixed_ctr_bits = 48, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &icl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type icl_uncore_arb = { + .name = "arb", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 44, + .perf_ctr = ICL_UNC_ARB_PER_CTR, + .event_ctl = ICL_UNC_ARB_PERFEVTSEL, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .ops = &icl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct intel_uncore_type *icl_msr_uncores[] = { + &icl_uncore_cbox, + &icl_uncore_arb, + &icl_uncore_clockbox, + NULL, +}; + +static int icl_get_cbox_num(void) +{ + u64 num_boxes; + + rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes); + + return num_boxes & ICL_UNC_NUM_CBO_MASK; +} + +void icl_uncore_cpu_init(void) +{ + uncore_msr_uncores = icl_msr_uncores; + icl_uncore_cbox.num_boxes = icl_get_cbox_num(); +} + +static struct intel_uncore_type *tgl_msr_uncores[] = { + &icl_uncore_cbox, + &snb_uncore_arb, + &icl_uncore_clockbox, + NULL, +}; + +static void rkl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +void tgl_uncore_cpu_init(void) +{ + uncore_msr_uncores = tgl_msr_uncores; + icl_uncore_cbox.num_boxes = icl_get_cbox_num(); + icl_uncore_cbox.ops = &skl_uncore_msr_ops; + icl_uncore_clockbox.ops = &skl_uncore_msr_ops; + snb_uncore_arb.ops = &skl_uncore_msr_ops; + skl_uncore_msr_ops.init_box = rkl_uncore_msr_init_box; +} + +static void adl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static void adl_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static void adl_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); +} + +static void adl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops adl_uncore_msr_ops = { + .init_box = adl_uncore_msr_init_box, + .enable_box = adl_uncore_msr_enable_box, + .disable_box = adl_uncore_msr_disable_box, + .exit_box = adl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct attribute *adl_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_threshold.attr, + NULL, +}; + +static const struct attribute_group adl_uncore_format_group = { + .name = "format", + .attrs = adl_uncore_formats_attr, +}; + +static struct intel_uncore_type adl_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .perf_ctr_bits = 44, + .perf_ctr = ADL_UNC_CBO_0_PER_CTR0, + .event_ctl = ADL_UNC_CBO_0_PERFEVTSEL0, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = ICL_UNC_CBO_MSR_OFFSET, + .ops = &adl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type adl_uncore_arb = { + .name = "arb", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 44, + .perf_ctr = ADL_UNC_ARB_PER_CTR0, + .event_ctl = ADL_UNC_ARB_PERFEVTSEL0, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = ADL_UNC_ARB_MSR_OFFSET, + .constraints = snb_uncore_arb_constraints, + .ops = &adl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct intel_uncore_type adl_uncore_clockbox = { + .name = "clock", + .num_counters = 1, + .num_boxes = 1, + .fixed_ctr_bits = 48, + .fixed_ctr = ADL_UNC_FIXED_CTR, + .fixed_ctl = ADL_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &adl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type *adl_msr_uncores[] = { + &adl_uncore_cbox, + &adl_uncore_arb, + &adl_uncore_clockbox, + NULL, +}; + +void adl_uncore_cpu_init(void) +{ + adl_uncore_cbox.num_boxes = icl_get_cbox_num(); + uncore_msr_uncores = adl_msr_uncores; +} + +static struct intel_uncore_type mtl_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_CBO_0_PER_CTR0, + .event_ctl = MTL_UNC_CBO_0_PERFEVTSEL0, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type mtl_uncore_hac_arb = { + .name = "hac_arb", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_HAC_ARB_CTR, + .event_ctl = MTL_UNC_HAC_ARB_CTRL, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type mtl_uncore_arb = { + .name = "arb", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_ARB_CTR, + .event_ctl = MTL_UNC_ARB_CTRL, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type mtl_uncore_hac_cbox = { + .name = "hac_cbox", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_HBO_CTR, + .event_ctl = MTL_UNC_HBO_CTRL, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static void mtl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN); +} + +static struct intel_uncore_ops mtl_uncore_msr_ops = { + .init_box = mtl_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type mtl_uncore_cncu = { + .name = "cncu", + .num_counters = 1, + .num_boxes = 1, + .box_ctl = MTL_UNC_CNCU_BOX_CTL, + .fixed_ctr_bits = 48, + .fixed_ctr = MTL_UNC_CNCU_FIXED_CTR, + .fixed_ctl = MTL_UNC_CNCU_FIXED_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &mtl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type mtl_uncore_sncu = { + .name = "sncu", + .num_counters = 1, + .num_boxes = 1, + .box_ctl = MTL_UNC_SNCU_BOX_CTL, + .fixed_ctr_bits = 48, + .fixed_ctr = MTL_UNC_SNCU_FIXED_CTR, + .fixed_ctl = MTL_UNC_SNCU_FIXED_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &mtl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type *mtl_msr_uncores[] = { + &mtl_uncore_cbox, + &mtl_uncore_hac_arb, + &mtl_uncore_arb, + &mtl_uncore_hac_cbox, + &mtl_uncore_cncu, + &mtl_uncore_sncu, + NULL +}; + +void mtl_uncore_cpu_init(void) +{ + mtl_uncore_cbox.num_boxes = icl_get_cbox_num(); + uncore_msr_uncores = mtl_msr_uncores; +} + +enum { + SNB_PCI_UNCORE_IMC, +}; + +static struct uncore_event_desc snb_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), + INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), + INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(gt_requests, "event=0x03"), + INTEL_UNCORE_EVENT_DESC(gt_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(gt_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ia_requests, "event=0x04"), + INTEL_UNCORE_EVENT_DESC(ia_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ia_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(io_requests, "event=0x05"), + INTEL_UNCORE_EVENT_DESC(io_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(io_requests.unit, "MiB"), + + { /* end: all zeroes */ }, +}; + +#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff +#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 + +/* page size multiple covering all config regs */ +#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 + +#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 +#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 +#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE + +/* BW break down- legacy counters */ +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS 0x3 +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE 0x5040 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS 0x4 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE 0x5044 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS 0x5 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE 0x5048 + +enum perf_snb_uncore_imc_freerunning_types { + SNB_PCI_UNCORE_IMC_DATA_READS = 0, + SNB_PCI_UNCORE_IMC_DATA_WRITES, + SNB_PCI_UNCORE_IMC_GT_REQUESTS, + SNB_PCI_UNCORE_IMC_IA_REQUESTS, + SNB_PCI_UNCORE_IMC_IO_REQUESTS, + + SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snb_uncore_imc_freerunning[] = { + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IA_REQUESTS] = { SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IO_REQUESTS] = { SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, +}; + +static struct attribute *snb_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static const struct attribute_group snb_uncore_imc_format_group = { + .name = "format", + .attrs = snb_uncore_imc_formats_attr, +}; + +static void snb_uncore_imc_init_box(struct intel_uncore_box *box) +{ + struct intel_uncore_type *type = box->pmu->type; + struct pci_dev *pdev = box->pci_dev; + int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; + resource_size_t addr; + u32 pci_dword; + + pci_read_config_dword(pdev, where, &pci_dword); + addr = pci_dword; + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, where + 4, &pci_dword); + addr |= ((resource_size_t)pci_dword << 32); +#endif + + addr &= ~(PAGE_SIZE - 1); + + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); + + box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; +} + +static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +/* + * Keep the custom event_init() function compatible with old event + * encoding for free running counters. + */ +static int snb_uncore_imc_event_init(struct perf_event *event) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + struct hw_perf_event *hwc = &event->hw; + u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; + int idx, base; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + pmu = uncore_event_to_pmu(event); + /* no device found for this pmu */ + if (pmu->func_id < 0) + return -ENOENT; + + /* Sampling not supported yet */ + if (hwc->sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* + * Place all uncore events for a particular physical package + * onto a single cpu + */ + if (event->cpu < 0) + return -EINVAL; + + /* check only supported bits are set */ + if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) + return -EINVAL; + + box = uncore_pmu_to_box(pmu, event->cpu); + if (!box || box->cpu < 0) + return -EINVAL; + + event->cpu = box->cpu; + event->pmu_private = box; + + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + /* + * check event is known (whitelist, determines counter) + */ + switch (cfg) { + case SNB_UNCORE_PCI_IMC_DATA_READS: + base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_DATA_WRITES: + base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_GT_REQUESTS: + base = SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IA_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IO_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + default: + return -EINVAL; + } + + /* must be done before validate_group */ + event->hw.event_base = base; + event->hw.idx = idx; + + /* Convert to standard encoding format for freerunning counters */ + event->hw.config = ((cfg - 1) << 8) | 0x10ff; + + /* no group validation needed, we have free running counters */ + + return 0; +} + +static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + return 0; +} + +int snb_pci2phy_map_init(int devid) +{ + struct pci_dev *dev = NULL; + struct pci2phy_map *map; + int bus, segment; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); + if (!dev) + return -ENOTTY; + + bus = dev->bus->number; + segment = pci_domain_nr(dev->bus); + + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + pci_dev_put(dev); + return -ENOMEM; + } + map->pbus_to_dieid[bus] = 0; + raw_spin_unlock(&pci2phy_map_lock); + + pci_dev_put(dev); + + return 0; +} + +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * SNB IMC counters are 32-bit and are laid out back to back + * in MMIO space. Therefore we must use a 32-bit accessor function + * using readq() from uncore_mmio_read_counter() causes problems + * because it is reading 64-bit at a time. This is okay for the + * uncore_perf_event_update() function because it drops the upper + * 32-bits but not okay for plain uncore_read_counter() as invoked + * in uncore_pmu_event_start(). + */ + return (u64)readl(box->io_addr + hwc->event_base); +} + +static struct pmu snb_uncore_imc_pmu = { + .task_ctx_nr = perf_invalid_context, + .event_init = snb_uncore_imc_event_init, + .add = uncore_pmu_event_add, + .del = uncore_pmu_event_del, + .start = uncore_pmu_event_start, + .stop = uncore_pmu_event_stop, + .read = uncore_pmu_event_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, +}; + +static struct intel_uncore_ops snb_uncore_imc_ops = { + .init_box = snb_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .enable_box = snb_uncore_imc_enable_box, + .disable_box = snb_uncore_imc_disable_box, + .disable_event = snb_uncore_imc_disable_event, + .enable_event = snb_uncore_imc_enable_event, + .hw_config = snb_uncore_imc_hw_config, + .read_counter = snb_uncore_imc_read_counter, +}; + +static struct intel_uncore_type snb_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 1, + .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE, + .freerunning = snb_uncore_imc_freerunning, + .event_descs = snb_uncore_imc_events, + .format_group = &snb_uncore_imc_format_group, + .ops = &snb_uncore_imc_ops, + .pmu = &snb_uncore_imc_pmu, +}; + +static struct intel_uncore_type *snb_pci_uncores[] = { + [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, + NULL, +}; + +static const struct pci_device_id snb_uncore_pci_ids[] = { + IMC_UNCORE_DEV(SNB), + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id ivb_uncore_pci_ids[] = { + IMC_UNCORE_DEV(IVB), + IMC_UNCORE_DEV(IVB_E3), + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id hsw_uncore_pci_ids[] = { + IMC_UNCORE_DEV(HSW), + IMC_UNCORE_DEV(HSW_U), + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id bdw_uncore_pci_ids[] = { + IMC_UNCORE_DEV(BDW), + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id skl_uncore_pci_ids[] = { + IMC_UNCORE_DEV(SKL_Y), + IMC_UNCORE_DEV(SKL_U), + IMC_UNCORE_DEV(SKL_HD), + IMC_UNCORE_DEV(SKL_HQ), + IMC_UNCORE_DEV(SKL_SD), + IMC_UNCORE_DEV(SKL_SQ), + IMC_UNCORE_DEV(SKL_E3), + IMC_UNCORE_DEV(KBL_Y), + IMC_UNCORE_DEV(KBL_U), + IMC_UNCORE_DEV(KBL_UQ), + IMC_UNCORE_DEV(KBL_SD), + IMC_UNCORE_DEV(KBL_SQ), + IMC_UNCORE_DEV(KBL_HQ), + IMC_UNCORE_DEV(KBL_WQ), + IMC_UNCORE_DEV(CFL_2U), + IMC_UNCORE_DEV(CFL_4U), + IMC_UNCORE_DEV(CFL_4H), + IMC_UNCORE_DEV(CFL_6H), + IMC_UNCORE_DEV(CFL_2S_D), + IMC_UNCORE_DEV(CFL_4S_D), + IMC_UNCORE_DEV(CFL_6S_D), + IMC_UNCORE_DEV(CFL_8S_D), + IMC_UNCORE_DEV(CFL_4S_W), + IMC_UNCORE_DEV(CFL_6S_W), + IMC_UNCORE_DEV(CFL_8S_W), + IMC_UNCORE_DEV(CFL_4S_S), + IMC_UNCORE_DEV(CFL_6S_S), + IMC_UNCORE_DEV(CFL_8S_S), + IMC_UNCORE_DEV(AML_YD), + IMC_UNCORE_DEV(AML_YQ), + IMC_UNCORE_DEV(WHL_UQ), + IMC_UNCORE_DEV(WHL_4_UQ), + IMC_UNCORE_DEV(WHL_UD), + IMC_UNCORE_DEV(CML_H1), + IMC_UNCORE_DEV(CML_H2), + IMC_UNCORE_DEV(CML_H3), + IMC_UNCORE_DEV(CML_U1), + IMC_UNCORE_DEV(CML_U2), + IMC_UNCORE_DEV(CML_U3), + IMC_UNCORE_DEV(CML_S1), + IMC_UNCORE_DEV(CML_S2), + IMC_UNCORE_DEV(CML_S3), + IMC_UNCORE_DEV(CML_S4), + IMC_UNCORE_DEV(CML_S5), + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id icl_uncore_pci_ids[] = { + IMC_UNCORE_DEV(ICL_U), + IMC_UNCORE_DEV(ICL_U2), + IMC_UNCORE_DEV(RKL_1), + IMC_UNCORE_DEV(RKL_2), + { /* end: all zeroes */ }, +}; + +static struct pci_driver snb_uncore_pci_driver = { + .name = "snb_uncore", + .id_table = snb_uncore_pci_ids, +}; + +static struct pci_driver ivb_uncore_pci_driver = { + .name = "ivb_uncore", + .id_table = ivb_uncore_pci_ids, +}; + +static struct pci_driver hsw_uncore_pci_driver = { + .name = "hsw_uncore", + .id_table = hsw_uncore_pci_ids, +}; + +static struct pci_driver bdw_uncore_pci_driver = { + .name = "bdw_uncore", + .id_table = bdw_uncore_pci_ids, +}; + +static struct pci_driver skl_uncore_pci_driver = { + .name = "skl_uncore", + .id_table = skl_uncore_pci_ids, +}; + +static struct pci_driver icl_uncore_pci_driver = { + .name = "icl_uncore", + .id_table = icl_uncore_pci_ids, +}; + +struct imc_uncore_pci_dev { + __u32 pci_id; + struct pci_driver *driver; +}; +#define IMC_DEV(a, d) \ + { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) } + +static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { + IMC_DEV(SNB_IMC, &snb_uncore_pci_driver), + IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */ + IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ + IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ + IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ + IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(SKL_E3_IMC, &skl_uncore_pci_driver), /* Xeon E3 V5 Gen Core processor */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */ + IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ + IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */ + IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */ + IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */ + IMC_DEV(CML_H1_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_H2_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_H3_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_U1_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_U2_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_U3_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S1_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S2_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S3_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S4_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S5_IMC, &skl_uncore_pci_driver), + IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ + IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ + IMC_DEV(RKL_1_IMC, &icl_uncore_pci_driver), + IMC_DEV(RKL_2_IMC, &icl_uncore_pci_driver), + { /* end marker */ } +}; + + +#define for_each_imc_pci_id(x, t) \ + for (x = (t); (x)->pci_id; x++) + +static struct pci_driver *imc_uncore_find_dev(void) +{ + const struct imc_uncore_pci_dev *p; + int ret; + + for_each_imc_pci_id(p, desktop_imc_pci_ids) { + ret = snb_pci2phy_map_init(p->pci_id); + if (ret == 0) + return p->driver; + } + return NULL; +} + +static int imc_uncore_pci_init(void) +{ + struct pci_driver *imc_drv = imc_uncore_find_dev(); + + if (!imc_drv) + return -ENODEV; + + uncore_pci_uncores = snb_pci_uncores; + uncore_pci_driver = imc_drv; + + return 0; +} + +int snb_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +int ivb_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} +int hsw_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +int bdw_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +int skl_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +/* end of Sandy Bridge uncore support */ + +/* Nehalem uncore support */ +static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); +} + +static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); +} + +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); +} + +static struct attribute *nhm_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask8.attr, + NULL, +}; + +static const struct attribute_group nhm_uncore_format_group = { + .name = "format", + .attrs = nhm_uncore_formats_attr, +}; + +static struct uncore_event_desc nhm_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhm_uncore_msr_ops = { + .disable_box = nhm_uncore_msr_disable_box, + .enable_box = nhm_uncore_msr_enable_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = nhm_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type nhm_uncore = { + .name = "", + .num_counters = 8, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .event_ctl = NHM_UNC_PERFEVTSEL0, + .perf_ctr = NHM_UNC_UNCORE_PMC0, + .fixed_ctr = NHM_UNC_FIXED_CTR, + .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, + .event_mask = NHM_UNC_RAW_EVENT_MASK, + .event_descs = nhm_uncore_events, + .ops = &nhm_uncore_msr_ops, + .format_group = &nhm_uncore_format_group, +}; + +static struct intel_uncore_type *nhm_msr_uncores[] = { + &nhm_uncore, + NULL, +}; + +void nhm_uncore_cpu_init(void) +{ + uncore_msr_uncores = nhm_msr_uncores; +} + +/* end of Nehalem uncore support */ + +/* Tiger Lake MMIO uncore support */ + +static const struct pci_device_id tgl_uncore_pci_ids[] = { + IMC_UNCORE_DEV(TGL_U1), + IMC_UNCORE_DEV(TGL_U2), + IMC_UNCORE_DEV(TGL_U3), + IMC_UNCORE_DEV(TGL_U4), + IMC_UNCORE_DEV(TGL_H), + IMC_UNCORE_DEV(ADL_1), + IMC_UNCORE_DEV(ADL_2), + IMC_UNCORE_DEV(ADL_3), + IMC_UNCORE_DEV(ADL_4), + IMC_UNCORE_DEV(ADL_5), + IMC_UNCORE_DEV(ADL_6), + IMC_UNCORE_DEV(ADL_7), + IMC_UNCORE_DEV(ADL_8), + IMC_UNCORE_DEV(ADL_9), + IMC_UNCORE_DEV(ADL_10), + IMC_UNCORE_DEV(ADL_11), + IMC_UNCORE_DEV(ADL_12), + IMC_UNCORE_DEV(ADL_13), + IMC_UNCORE_DEV(ADL_14), + IMC_UNCORE_DEV(ADL_15), + IMC_UNCORE_DEV(ADL_16), + IMC_UNCORE_DEV(ADL_17), + IMC_UNCORE_DEV(ADL_18), + IMC_UNCORE_DEV(ADL_19), + IMC_UNCORE_DEV(ADL_20), + IMC_UNCORE_DEV(ADL_21), + IMC_UNCORE_DEV(RPL_1), + IMC_UNCORE_DEV(RPL_2), + IMC_UNCORE_DEV(RPL_3), + IMC_UNCORE_DEV(RPL_4), + IMC_UNCORE_DEV(RPL_5), + IMC_UNCORE_DEV(RPL_6), + IMC_UNCORE_DEV(RPL_7), + IMC_UNCORE_DEV(RPL_8), + IMC_UNCORE_DEV(RPL_9), + IMC_UNCORE_DEV(RPL_10), + IMC_UNCORE_DEV(RPL_11), + IMC_UNCORE_DEV(RPL_12), + IMC_UNCORE_DEV(RPL_13), + IMC_UNCORE_DEV(RPL_14), + IMC_UNCORE_DEV(RPL_15), + IMC_UNCORE_DEV(RPL_16), + IMC_UNCORE_DEV(RPL_17), + IMC_UNCORE_DEV(RPL_18), + IMC_UNCORE_DEV(RPL_19), + IMC_UNCORE_DEV(RPL_20), + IMC_UNCORE_DEV(RPL_21), + IMC_UNCORE_DEV(RPL_22), + IMC_UNCORE_DEV(RPL_23), + IMC_UNCORE_DEV(RPL_24), + IMC_UNCORE_DEV(RPL_25), + IMC_UNCORE_DEV(MTL_1), + IMC_UNCORE_DEV(MTL_2), + IMC_UNCORE_DEV(MTL_3), + IMC_UNCORE_DEV(MTL_4), + IMC_UNCORE_DEV(MTL_5), + IMC_UNCORE_DEV(MTL_6), + IMC_UNCORE_DEV(MTL_7), + IMC_UNCORE_DEV(MTL_8), + IMC_UNCORE_DEV(MTL_9), + IMC_UNCORE_DEV(MTL_10), + IMC_UNCORE_DEV(MTL_11), + IMC_UNCORE_DEV(MTL_12), + IMC_UNCORE_DEV(MTL_13), + { /* end: all zeroes */ } +}; + +enum perf_tgl_uncore_imc_freerunning_types { + TGL_MMIO_UNCORE_IMC_DATA_TOTAL, + TGL_MMIO_UNCORE_IMC_DATA_READ, + TGL_MMIO_UNCORE_IMC_DATA_WRITE, + TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 }, +}; + +static struct freerunning_counters tgl_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 }, +}; + +static struct uncore_event_desc tgl_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"), + + { /* end: all zeroes */ } +}; + +static struct pci_dev *tgl_uncore_get_mc_dev(void) +{ + const struct pci_device_id *ids = tgl_uncore_pci_ids; + struct pci_dev *mc_dev = NULL; + + while (ids && ids->vendor) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL); + if (mc_dev) + return mc_dev; + ids++; + } + + return mc_dev; +} + +#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 +#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 + +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) +{ + struct pci_dev *pdev = tgl_uncore_get_mc_dev(); + struct intel_uncore_pmu *pmu = box->pmu; + struct intel_uncore_type *type = pmu->type; + resource_size_t addr; + u32 mch_bar; + + if (!pdev) { + pr_warn("perf uncore: Cannot find matched IMC device.\n"); + return; + } + + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); + /* MCHBAR is disabled */ + if (!(mch_bar & BIT(0))) { + pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + pci_dev_put(pdev); + return; + } + mch_bar &= ~BIT(0); + addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); + addr |= ((resource_size_t)mch_bar << 32); +#endif + + addr += base_offset; + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); + + pci_dev_put(pdev); +} + +static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, 0); +} + +static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { + .init_box = tgl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct attribute *tgl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL +}; + +static const struct attribute_group tgl_uncore_imc_format_group = { + .name = "format", + .attrs = tgl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type tgl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = TGL_UNCORE_PCI_IMC_MAP_SIZE, + .freerunning = tgl_uncore_imc_freerunning, + .ops = &tgl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *tgl_mmio_uncores[] = { + &tgl_uncore_imc_free_running, + NULL +}; + +void tgl_l_uncore_mmio_init(void) +{ + tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning; + uncore_mmio_uncores = tgl_mmio_uncores; +} + +void tgl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = tgl_mmio_uncores; +} + +/* end of Tiger Lake MMIO uncore support */ + +/* Alder Lake MMIO uncore support */ +#define ADL_UNCORE_IMC_BASE 0xd900 +#define ADL_UNCORE_IMC_MAP_SIZE 0x200 +#define ADL_UNCORE_IMC_CTR 0xe8 +#define ADL_UNCORE_IMC_CTRL 0xd0 +#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0 +#define ADL_UNCORE_IMC_BOX_CTL 0xc4 +#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800 +#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100 + +#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0) +#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1) +#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2) +#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \ + ADL_UNCORE_IMC_CTL_RST_CTRS) + +static void adl_uncore_imc_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE); + + /* The global control in MC1 can control both MCs. */ + if (box->io_addr && (box->pmu->pmu_idx == 1)) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL); +} + +static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static struct intel_uncore_ops adl_uncore_mmio_ops = { + .init_box = adl_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = adl_uncore_mmio_disable_box, + .enable_box = adl_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 +#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + ADL_UNC_CTL_CHMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET) + +static struct attribute *adl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_chmask.attr, + &format_attr_edge.attr, + NULL, +}; + +static const struct attribute_group adl_uncore_imc_format_group = { + .name = "format", + .attrs = adl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type adl_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = ADL_UNCORE_IMC_CTR, + .event_ctl = ADL_UNCORE_IMC_CTRL, + .event_mask = ADL_UNC_IMC_EVENT_MASK, + .box_ctl = ADL_UNCORE_IMC_BOX_CTL, + .mmio_offset = 0, + .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE, + .ops = &adl_uncore_mmio_ops, + .format_group = &adl_uncore_imc_format_group, +}; + +enum perf_adl_uncore_imc_freerunning_types { + ADL_MMIO_UNCORE_IMC_DATA_TOTAL, + ADL_MMIO_UNCORE_IMC_DATA_READ, + ADL_MMIO_UNCORE_IMC_DATA_WRITE, + ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters adl_uncore_imc_freerunning[] = { + [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 }, +}; + +static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE); +} + +static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = { + .init_box = adl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type adl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE, + .freerunning = adl_uncore_imc_freerunning, + .ops = &adl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *adl_mmio_uncores[] = { + &adl_uncore_imc, + &adl_uncore_imc_free_running, + NULL +}; + +void adl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = adl_mmio_uncores; +} + +/* end of Alder Lake MMIO uncore support */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c new file mode 100644 index 000000000..9b5859812 --- /dev/null +++ b/arch/x86/events/intel/uncore_snbep.c @@ -0,0 +1,6118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* SandyBridge-EP/IvyTown uncore support */ +#include "uncore.h" +#include "uncore_discovery.h" + +/* SNB-EP pci bus to socket mapping */ +#define SNBEP_CPUNODEID 0x40 +#define SNBEP_GIDNIDMAP 0x54 + +/* SNB-EP Box level control */ +#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) +#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) +#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) +#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) +#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS | \ + SNBEP_PMON_BOX_CTL_FRZ_EN) +/* SNB-EP event control */ +#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff +#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 +#define SNBEP_PMON_CTL_RST (1 << 17) +#define SNBEP_PMON_CTL_EDGE_DET (1 << 18) +#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) +#define SNBEP_PMON_CTL_EN (1 << 22) +#define SNBEP_PMON_CTL_INVERT (1 << 23) +#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 +#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PMON_CTL_TRESH_MASK) + +/* SNB-EP Ubox event control */ +#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) + +#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) +#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +/* SNB-EP PCU event control */ +#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 +#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) +#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) +#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + +/* SNB-EP pci control register */ +#define SNBEP_PCI_PMON_BOX_CTL 0xf4 +#define SNBEP_PCI_PMON_CTL0 0xd8 +/* SNB-EP pci counter register */ +#define SNBEP_PCI_PMON_CTR0 0xa0 + +/* SNB-EP home agent register */ +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 +#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 +/* SNB-EP memory controller register */ +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 +/* SNB-EP QPI register */ +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c + +/* SNB-EP Ubox register */ +#define SNBEP_U_MSR_PMON_CTR0 0xc16 +#define SNBEP_U_MSR_PMON_CTL0 0xc10 + +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 + +/* SNB-EP Cbo register */ +#define SNBEP_C0_MSR_PMON_CTR0 0xd16 +#define SNBEP_C0_MSR_PMON_CTL0 0xd10 +#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 +#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 +#define SNBEP_CBO_MSR_OFFSET 0x20 + +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000 + +#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \ + .event = (e), \ + .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \ + .config_mask = (m), \ + .idx = (i) \ +} + +/* SNB-EP PCU register */ +#define SNBEP_PCU_MSR_PMON_CTR0 0xc36 +#define SNBEP_PCU_MSR_PMON_CTL0 0xc30 +#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff +#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc +#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd + +/* IVBEP event control */ +#define IVBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS) +#define IVBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_TRESH_MASK) +/* IVBEP Ubox */ +#define IVBEP_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define IVBEP_U_PMON_GLOBAL_FRZ_ALL (1 << 31) +#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29) + +#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) +/* IVBEP Cbo */ +#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK (IVBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + +/* IVBEP home agent */ +#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16) +#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK \ + (IVBEP_PMON_RAW_EVENT_MASK | \ + IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST) +/* IVBEP PCU */ +#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +/* IVBEP QPI */ +#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (IVBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +/* Haswell-EP Ubox */ +#define HSWEP_U_MSR_PMON_CTR0 0x709 +#define HSWEP_U_MSR_PMON_CTL0 0x705 +#define HSWEP_U_MSR_PMON_FILTER 0x707 + +#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703 +#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR 0x704 + +#define HSWEP_U_MSR_PMON_BOX_FILTER_TID (0x1 << 0) +#define HSWEP_U_MSR_PMON_BOX_FILTER_CID (0x1fULL << 1) +#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \ + (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \ + HSWEP_U_MSR_PMON_BOX_FILTER_CID) + +/* Haswell-EP CBo */ +#define HSWEP_C0_MSR_PMON_CTR0 0xe08 +#define HSWEP_C0_MSR_PMON_CTL0 0xe01 +#define HSWEP_C0_MSR_PMON_BOX_CTL 0xe00 +#define HSWEP_C0_MSR_PMON_BOX_FILTER0 0xe05 +#define HSWEP_CBO_MSR_OFFSET 0x10 + + +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID (0x3fULL << 0) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 6) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x7fULL << 17) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + + +/* Haswell-EP Sbox */ +#define HSWEP_S0_MSR_PMON_CTR0 0x726 +#define HSWEP_S0_MSR_PMON_CTL0 0x721 +#define HSWEP_S0_MSR_PMON_BOX_CTL 0x720 +#define HSWEP_SBOX_MSR_OFFSET 0xa +#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +/* Haswell-EP PCU */ +#define HSWEP_PCU_MSR_PMON_CTR0 0x717 +#define HSWEP_PCU_MSR_PMON_CTL0 0x711 +#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710 +#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715 + +/* KNL Ubox */ +#define KNL_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) +/* KNL CHA */ +#define KNL_CHA_MSR_OFFSET 0xc +#define KNL_CHA_MSR_PMON_CTL_QOR (1 << 16) +#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \ + KNL_CHA_MSR_PMON_CTL_QOR) +#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff +#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18) +#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32) +#define KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE (0x1ULL << 32) +#define KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE (0x1ULL << 33) +#define KNL_CHA_MSR_PMON_BOX_FILTER_NNC (0x1ULL << 37) + +/* KNL EDC/MC UCLK */ +#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400 +#define KNL_UCLK_MSR_PMON_CTL0 0x420 +#define KNL_UCLK_MSR_PMON_BOX_CTL 0x430 +#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW 0x44c +#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL 0x454 +#define KNL_PMON_FIXED_CTL_EN 0x1 + +/* KNL EDC */ +#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW 0xa00 +#define KNL_EDC0_ECLK_MSR_PMON_CTL0 0xa20 +#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL 0xa30 +#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW 0xa3c +#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL 0xa44 + +/* KNL MC */ +#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW 0xb00 +#define KNL_MC0_CH0_MSR_PMON_CTL0 0xb20 +#define KNL_MC0_CH0_MSR_PMON_BOX_CTL 0xb30 +#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW 0xb3c +#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL 0xb44 + +/* KNL IRP */ +#define KNL_IRP_PCI_PMON_BOX_CTL 0xf0 +#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + KNL_CHA_MSR_PMON_CTL_QOR) +/* KNL PCU */ +#define KNL_PCU_PMON_CTL_EV_SEL_MASK 0x0000007f +#define KNL_PCU_PMON_CTL_USE_OCC_CTR (1 << 7) +#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK 0x3f000000 +#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK \ + (KNL_PCU_PMON_CTL_EV_SEL_MASK | \ + KNL_PCU_PMON_CTL_USE_OCC_CTR | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_CBO_PMON_CTL_TID_EN | \ + SNBEP_PMON_CTL_INVERT | \ + KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +/* SKX pci bus to socket mapping */ +#define SKX_CPUNODEID 0xc0 +#define SKX_GIDNIDMAP 0xd4 + +/* + * The CPU_BUS_NUMBER MSR returns the values of the respective CPUBUSNO CSR + * that BIOS programmed. MSR has package scope. + * | Bit | Default | Description + * | [63] | 00h | VALID - When set, indicates the CPU bus + * numbers have been initialized. (RO) + * |[62:48]| --- | Reserved + * |[47:40]| 00h | BUS_NUM_5 - Return the bus number BIOS assigned + * CPUBUSNO(5). (RO) + * |[39:32]| 00h | BUS_NUM_4 - Return the bus number BIOS assigned + * CPUBUSNO(4). (RO) + * |[31:24]| 00h | BUS_NUM_3 - Return the bus number BIOS assigned + * CPUBUSNO(3). (RO) + * |[23:16]| 00h | BUS_NUM_2 - Return the bus number BIOS assigned + * CPUBUSNO(2). (RO) + * |[15:8] | 00h | BUS_NUM_1 - Return the bus number BIOS assigned + * CPUBUSNO(1). (RO) + * | [7:0] | 00h | BUS_NUM_0 - Return the bus number BIOS assigned + * CPUBUSNO(0). (RO) + */ +#define SKX_MSR_CPU_BUS_NUMBER 0x300 +#define SKX_MSR_CPU_BUS_VALID_BIT (1ULL << 63) +#define BUS_NUM_STRIDE 8 + +/* SKX CHA */ +#define SKX_CHA_MSR_PMON_BOX_FILTER_TID (0x1ffULL << 0) +#define SKX_CHA_MSR_PMON_BOX_FILTER_LINK (0xfULL << 9) +#define SKX_CHA_MSR_PMON_BOX_FILTER_STATE (0x3ffULL << 17) +#define SKX_CHA_MSR_PMON_BOX_FILTER_REM (0x1ULL << 32) +#define SKX_CHA_MSR_PMON_BOX_FILTER_LOC (0x1ULL << 33) +#define SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC (0x1ULL << 35) +#define SKX_CHA_MSR_PMON_BOX_FILTER_NM (0x1ULL << 36) +#define SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM (0x1ULL << 37) +#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC0 (0x3ffULL << 41) +#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC1 (0x3ffULL << 51) +#define SKX_CHA_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define SKX_CHA_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define SKX_CHA_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + +/* SKX IIO */ +#define SKX_IIO0_MSR_PMON_CTL0 0xa48 +#define SKX_IIO0_MSR_PMON_CTR0 0xa41 +#define SKX_IIO0_MSR_PMON_BOX_CTL 0xa40 +#define SKX_IIO_MSR_OFFSET 0x20 + +#define SKX_PMON_CTL_TRESH_MASK (0xff << 24) +#define SKX_PMON_CTL_TRESH_MASK_EXT (0xf) +#define SKX_PMON_CTL_CH_MASK (0xff << 4) +#define SKX_PMON_CTL_FC_MASK (0x7 << 12) +#define SKX_IIO_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SKX_PMON_CTL_TRESH_MASK) +#define SKX_IIO_PMON_RAW_EVENT_MASK_EXT (SKX_PMON_CTL_TRESH_MASK_EXT | \ + SKX_PMON_CTL_CH_MASK | \ + SKX_PMON_CTL_FC_MASK) + +/* SKX IRP */ +#define SKX_IRP0_MSR_PMON_CTL0 0xa5b +#define SKX_IRP0_MSR_PMON_CTR0 0xa59 +#define SKX_IRP0_MSR_PMON_BOX_CTL 0xa58 +#define SKX_IRP_MSR_OFFSET 0x20 + +/* SKX UPI */ +#define SKX_UPI_PCI_PMON_CTL0 0x350 +#define SKX_UPI_PCI_PMON_CTR0 0x318 +#define SKX_UPI_PCI_PMON_BOX_CTL 0x378 +#define SKX_UPI_CTL_UMASK_EXT 0xffefff + +/* SKX M2M */ +#define SKX_M2M_PCI_PMON_CTL0 0x228 +#define SKX_M2M_PCI_PMON_CTR0 0x200 +#define SKX_M2M_PCI_PMON_BOX_CTL 0x258 + +/* Memory Map registers device ID */ +#define SNR_ICX_MESH2IIO_MMAP_DID 0x9a2 +#define SNR_ICX_SAD_CONTROL_CFG 0x3f4 + +/* Getting I/O stack id in SAD_COTROL_CFG notation */ +#define SAD_CONTROL_STACK_ID(data) (((data) >> 4) & 0x7) + +/* SNR Ubox */ +#define SNR_U_MSR_PMON_CTR0 0x1f98 +#define SNR_U_MSR_PMON_CTL0 0x1f91 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94 + +/* SNR CHA */ +#define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff +#define SNR_CHA_MSR_PMON_CTL0 0x1c01 +#define SNR_CHA_MSR_PMON_CTR0 0x1c08 +#define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00 +#define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05 + + +/* SNR IIO */ +#define SNR_IIO_MSR_PMON_CTL0 0x1e08 +#define SNR_IIO_MSR_PMON_CTR0 0x1e01 +#define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00 +#define SNR_IIO_MSR_OFFSET 0x10 +#define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff + +/* SNR IRP */ +#define SNR_IRP0_MSR_PMON_CTL0 0x1ea8 +#define SNR_IRP0_MSR_PMON_CTR0 0x1ea1 +#define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0 +#define SNR_IRP_MSR_OFFSET 0x10 + +/* SNR M2PCIE */ +#define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58 +#define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51 +#define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50 +#define SNR_M2PCIE_MSR_OFFSET 0x10 + +/* SNR PCU */ +#define SNR_PCU_MSR_PMON_CTL0 0x1ef1 +#define SNR_PCU_MSR_PMON_CTR0 0x1ef8 +#define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0 +#define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc + +/* SNR M2M */ +#define SNR_M2M_PCI_PMON_CTL0 0x468 +#define SNR_M2M_PCI_PMON_CTR0 0x440 +#define SNR_M2M_PCI_PMON_BOX_CTL 0x438 +#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff + +/* SNR PCIE3 */ +#define SNR_PCIE3_PCI_PMON_CTL0 0x508 +#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 +#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e0 + +/* SNR IMC */ +#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 +#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 +#define SNR_IMC_MMIO_PMON_CTL0 0x40 +#define SNR_IMC_MMIO_PMON_CTR0 0x8 +#define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800 +#define SNR_IMC_MMIO_OFFSET 0x4000 +#define SNR_IMC_MMIO_SIZE 0x4000 +#define SNR_IMC_MMIO_BASE_OFFSET 0xd0 +#define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF +#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 +#define SNR_IMC_MMIO_MEM0_MASK 0x7FF + +/* ICX CHA */ +#define ICX_C34_MSR_PMON_CTR0 0xb68 +#define ICX_C34_MSR_PMON_CTL0 0xb61 +#define ICX_C34_MSR_PMON_BOX_CTL 0xb60 +#define ICX_C34_MSR_PMON_BOX_FILTER0 0xb65 + +/* ICX IIO */ +#define ICX_IIO_MSR_PMON_CTL0 0xa58 +#define ICX_IIO_MSR_PMON_CTR0 0xa51 +#define ICX_IIO_MSR_PMON_BOX_CTL 0xa50 + +/* ICX IRP */ +#define ICX_IRP0_MSR_PMON_CTL0 0xa4d +#define ICX_IRP0_MSR_PMON_CTR0 0xa4b +#define ICX_IRP0_MSR_PMON_BOX_CTL 0xa4a + +/* ICX M2PCIE */ +#define ICX_M2PCIE_MSR_PMON_CTL0 0xa46 +#define ICX_M2PCIE_MSR_PMON_CTR0 0xa41 +#define ICX_M2PCIE_MSR_PMON_BOX_CTL 0xa40 + +/* ICX UPI */ +#define ICX_UPI_PCI_PMON_CTL0 0x350 +#define ICX_UPI_PCI_PMON_CTR0 0x320 +#define ICX_UPI_PCI_PMON_BOX_CTL 0x318 +#define ICX_UPI_CTL_UMASK_EXT 0xffffff + +/* ICX M3UPI*/ +#define ICX_M3UPI_PCI_PMON_CTL0 0xd8 +#define ICX_M3UPI_PCI_PMON_CTR0 0xa8 +#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0 + +/* ICX IMC */ +#define ICX_NUMBER_IMC_CHN 3 +#define ICX_IMC_MEM_STRIDE 0x4 + +/* SPR */ +#define SPR_RAW_EVENT_MASK_EXT 0xffffff + +/* SPR CHA */ +#define SPR_CHA_PMON_CTL_TID_EN (1 << 16) +#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SPR_CHA_PMON_CTL_TID_EN) +#define SPR_CHA_PMON_BOX_FILTER_TID 0x3ff + +#define SPR_C0_MSR_PMON_BOX_FILTER0 0x200e + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); +DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); +DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); +DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en2, tid_en, "config:16"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29"); +DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); +DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); +DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43"); +DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47"); +DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46"); +DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state5, filter_state, "config1:17-26"); +DEFINE_UNCORE_FORMAT_ATTR(filter_rem, filter_rem, "config1:32"); +DEFINE_UNCORE_FORMAT_ATTR(filter_loc, filter_loc, "config1:33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nm, filter_nm, "config1:36"); +DEFINE_UNCORE_FORMAT_ATTR(filter_not_nm, filter_not_nm, "config1:37"); +DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc_0, filter_opc0, "config1:41-50"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc_1, filter_opc1, "config1:51-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62"); +DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61"); +DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51"); +DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35"); +DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31"); +DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); +DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); +DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); +DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); +DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); +DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); +DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); +DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); + +static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config = 0; + + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } +} + +static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config = 0; + + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } +} + +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + + return count; +} + +static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT); +} + +static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config |= SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + } +} + +static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + } +} + +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + + if (msr) + wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); +} + +static struct attribute *snbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *snbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *snbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_nid.attr, + &format_attr_filter_state.attr, + &format_attr_filter_opc.attr, + NULL, +}; + +static struct attribute *snbep_uncore_pcu_formats_attr[] = { + &format_attr_event.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *snbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, + NULL, +}; + +static struct uncore_event_desc snbep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc snbep_uncore_qpi_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), + INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), + INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), + { /* end: all zeroes */ }, +}; + +static const struct attribute_group snbep_uncore_format_group = { + .name = "format", + .attrs = snbep_uncore_formats_attr, +}; + +static const struct attribute_group snbep_uncore_ubox_format_group = { + .name = "format", + .attrs = snbep_uncore_ubox_formats_attr, +}; + +static const struct attribute_group snbep_uncore_cbox_format_group = { + .name = "format", + .attrs = snbep_uncore_cbox_formats_attr, +}; + +static const struct attribute_group snbep_uncore_pcu_format_group = { + .name = "format", + .attrs = snbep_uncore_pcu_formats_attr, +}; + +static const struct attribute_group snbep_uncore_qpi_format_group = { + .name = "format", + .attrs = snbep_uncore_qpi_formats_attr, +}; + +#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \ + .init_box = snbep_uncore_msr_init_box \ + +static struct intel_uncore_ops snbep_uncore_msr_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \ + .init_box = snbep_uncore_pci_init_box, \ + .disable_box = snbep_uncore_pci_disable_box, \ + .enable_box = snbep_uncore_pci_enable_box, \ + .disable_event = snbep_uncore_pci_disable_event, \ + .read_counter = snbep_uncore_pci_read_counter + +static struct intel_uncore_ops snbep_uncore_pci_ops = { + SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), + .enable_event = snbep_uncore_pci_enable_event, \ +}; + +static struct event_constraint snbep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x04, 0x3), + UNCORE_EVENT_CONSTRAINT(0x05, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x3), + UNCORE_EVENT_CONSTRAINT(0x09, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1f, 0xe), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x30, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_ubox_format_group, +}; + +static struct extra_reg snbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2), + EVENT_EXTRA_END +}; + +static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i; + + if (uncore_box_is_fake(box)) + return; + + for (i = 0; i < 5; i++) { + if (reg1->alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + reg1->alloc = 0; +} + +static struct event_constraint * +__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event, + u64 (*cbox_filter_mask)(int fields)) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i, alloc = 0; + unsigned long flags; + u64 mask; + + if (reg1->idx == EXTRA_REG_NONE) + return NULL; + + raw_spin_lock_irqsave(&er->lock, flags); + for (i = 0; i < 5; i++) { + if (!(reg1->idx & (0x1 << i))) + continue; + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + continue; + + mask = cbox_filter_mask(0x1 << i); + if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) || + !((reg1->config ^ er->config) & mask)) { + atomic_add(1 << (i * 6), &er->ref); + er->config &= ~mask; + er->config |= reg1->config & mask; + alloc |= (0x1 << i); + } else { + break; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + if (i < 5) + goto fail; + + if (!uncore_box_is_fake(box)) + reg1->alloc |= alloc; + + return NULL; +fail: + for (; i >= 0; i--) { + if (alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + return &uncore_constraint_empty; +} + +static u64 snbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x4) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + + return mask; +} + +static struct event_constraint * +snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask); +} + +static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_cbox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_cbox_hw_config, + .get_constraint = snbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type snbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &snbep_uncore_cbox_ops, + .format_group = &snbep_uncore_cbox_format_group, +}; + +static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 config = reg1->config; + + if (new_idx > reg1->idx) + config <<= 8 * (new_idx - reg1->idx); + else + config >>= 8 * (reg1->idx - new_idx); + + if (modify) { + hwc->config += new_idx - reg1->idx; + reg1->config = config; + reg1->idx = new_idx; + } + return config; +} + +static struct event_constraint * +snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + unsigned long flags; + int idx = reg1->idx; + u64 mask, config1 = reg1->config; + bool ok = false; + + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; +again: + mask = 0xffULL << (idx * 8); + raw_spin_lock_irqsave(&er->lock, flags); + if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || + !((config1 ^ er->config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + idx = (idx + 1) % 4; + if (idx != reg1->idx) { + config1 = snbep_pcu_alter_er(event, idx, false); + goto again; + } + return &uncore_constraint_empty; + } + + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx) + snbep_pcu_alter_er(event, idx, true); + reg1->alloc = 1; + } + return NULL; +} + +static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + atomic_sub(1 << (reg1->idx * 8), &er->ref); + reg1->alloc = 0; +} + +static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8)); + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *snbep_msr_uncores[] = { + &snbep_uncore_ubox, + &snbep_uncore_cbox, + &snbep_uncore_pcu, + NULL, +}; + +void snbep_uncore_cpu_init(void) +{ + if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = snbep_msr_uncores; +} + +enum { + SNBEP_PCI_QPI_PORT0_FILTER, + SNBEP_PCI_QPI_PORT1_FILTER, + BDX_PCI_QPI_PORT2_FILTER, +}; + +static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) { + reg1->idx = 0; + reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0; + reg1->config = event->attr.config1; + reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0; + reg2->config = event->attr.config2; + } + return 0; +} + +static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; + int die = box->dieid; + struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx]; + + if (filter_pdev) { + pci_write_config_dword(filter_pdev, reg1->reg, + (u32)reg1->config); + pci_write_config_dword(filter_pdev, reg1->reg + 4, + (u32)(reg1->config >> 32)); + pci_write_config_dword(filter_pdev, reg2->reg, + (u32)reg2->config); + pci_write_config_dword(filter_pdev, reg2->reg + 4, + (u32)(reg2->config >> 32)); + } + } + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snbep_uncore_qpi_ops = { + SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), + .enable_event = snbep_qpi_enable_event, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +#define SNBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &snbep_uncore_pci_ops, \ + .format_group = &snbep_uncore_format_group + +static struct intel_uncore_type snbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .event_descs = snbep_uncore_qpi_events, + .format_group = &snbep_uncore_qpi_format_group, +}; + + +static struct intel_uncore_type snbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + SNBEP_PCI_UNCORE_HA, + SNBEP_PCI_UNCORE_IMC, + SNBEP_PCI_UNCORE_QPI, + SNBEP_PCI_UNCORE_R2PCIE, + SNBEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *snbep_pci_uncores[] = { + [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha, + [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc, + [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi, + [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie, + [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id snbep_uncore_pci_ids[] = { + { /* Home Agent */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), + }, + { /* MC Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0), + }, + { /* MC Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1), + }, + { /* MC Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2), + }, + { /* MC Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3), + }, + { /* QPI Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snbep_uncore_pci_driver = { + .name = "snbep_uncore", + .id_table = snbep_uncore_pci_ids, +}; + +#define NODE_ID_MASK 0x7 + +/* + * build pci bus to socket mapping + */ +static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse) +{ + struct pci_dev *ubox_dev = NULL; + int i, bus, nodeid, segment, die_id; + struct pci2phy_map *map; + int err = 0; + u32 config = 0; + + while (1) { + /* find the UBOX device */ + ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev); + if (!ubox_dev) + break; + bus = ubox_dev->bus->number; + /* + * The nodeid and idmap registers only contain enough + * information to handle 8 nodes. On systems with more + * than 8 nodes, we need to rely on NUMA information, + * filled in from BIOS supplied information, to determine + * the topology. + */ + if (nr_node_ids <= 8) { + /* get the Node ID of the local register */ + err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); + if (err) + break; + nodeid = config & NODE_ID_MASK; + /* get the Node ID mapping */ + err = pci_read_config_dword(ubox_dev, idmap_loc, &config); + if (err) + break; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } + + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + if (topology_max_die_per_package() > 1) + die_id = i; + else + die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; + map->pbus_to_dieid[bus] = die_id; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + } else { + int node = pcibus_to_node(ubox_dev->bus); + int cpu; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } + + die_id = -1; + for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + map->pbus_to_dieid[bus] = die_id = c->logical_die_id; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + if (WARN_ON_ONCE(die_id == -1)) { + err = -EINVAL; + break; + } + } + } + + if (!err) { + /* + * For PCI bus with no UBOX device, find the next bus + * that has UBOX device and use its mapping. + */ + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + i = -1; + if (reverse) { + for (bus = 255; bus >= 0; bus--) { + if (map->pbus_to_dieid[bus] != -1) + i = map->pbus_to_dieid[bus]; + else + map->pbus_to_dieid[bus] = i; + } + } else { + for (bus = 0; bus <= 255; bus++) { + if (map->pbus_to_dieid[bus] != -1) + i = map->pbus_to_dieid[bus]; + else + map->pbus_to_dieid[bus] = i; + } + } + } + raw_spin_unlock(&pci2phy_map_lock); + } + + pci_dev_put(ubox_dev); + + return err ? pcibios_err_to_errno(err) : 0; +} + +int snbep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3ce0, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); + if (ret) + return ret; + uncore_pci_uncores = snbep_pci_uncores; + uncore_pci_driver = &snbep_uncore_pci_driver; + return 0; +} +/* end of Sandy Bridge-EP uncore support */ + +/* IvyTown uncore support */ +static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + if (msr) + wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT); +} + +static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = ivbep_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops ivbep_uncore_msr_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +static struct intel_uncore_ops ivbep_uncore_pci_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +#define IVBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = IVBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &ivbep_uncore_pci_ops, \ + .format_group = &ivbep_uncore_format_group + +static struct attribute *ivbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_link.attr, + &format_attr_filter_state2.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_pcu_formats_attr[] = { + &format_attr_event.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, + NULL, +}; + +static const struct attribute_group ivbep_uncore_format_group = { + .name = "format", + .attrs = ivbep_uncore_formats_attr, +}; + +static const struct attribute_group ivbep_uncore_ubox_format_group = { + .name = "format", + .attrs = ivbep_uncore_ubox_formats_attr, +}; + +static const struct attribute_group ivbep_uncore_cbox_format_group = { + .name = "format", + .attrs = ivbep_uncore_cbox_formats_attr, +}; + +static const struct attribute_group ivbep_uncore_pcu_format_group = { + .name = "format", + .attrs = ivbep_uncore_pcu_formats_attr, +}; + +static const struct attribute_group ivbep_uncore_qpi_format_group = { + .name = "format", + .attrs = ivbep_uncore_qpi_formats_attr, +}; + +static struct intel_uncore_type ivbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = IVBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_ubox_format_group, +}; + +static struct extra_reg ivbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + EVENT_EXTRA_END +}; + +static u64 ivbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) { + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC; + } + + return mask; +} + +static struct event_constraint * +ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask); +} + +static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 6, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops ivbep_uncore_cbox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = ivbep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = ivbep_cbox_hw_config, + .get_constraint = ivbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 15, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &ivbep_uncore_cbox_ops, + .format_group = &ivbep_uncore_cbox_format_group, +}; + +static struct intel_uncore_ops ivbep_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_pcu_ops, + .format_group = &ivbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *ivbep_msr_uncores[] = { + &ivbep_uncore_ubox, + &ivbep_uncore_cbox, + &ivbep_uncore_pcu, + NULL, +}; + +void ivbep_uncore_cpu_init(void) +{ + if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = ivbep_msr_uncores; +} + +static struct intel_uncore_type ivbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +/* registers in IRP boxes are not properly aligned */ +static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; +static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; + +static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], + hwc->config | SNBEP_PMON_CTL_EN); +} + +static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config); +} + +static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops ivbep_uncore_irp_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivbep_uncore_irp_disable_event, + .enable_event = ivbep_uncore_irp_enable_event, + .read_counter = ivbep_uncore_irp_read_counter, +}; + +static struct intel_uncore_type ivbep_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = IVBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_irp_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_ops ivbep_uncore_qpi_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_qpi_enable_event, + .read_counter = snbep_uncore_pci_read_counter, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_qpi_ops, + .format_group = &ivbep_uncore_qpi_format_group, +}; + +static struct intel_uncore_type ivbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + IVBEP_PCI_UNCORE_HA, + IVBEP_PCI_UNCORE_IMC, + IVBEP_PCI_UNCORE_IRP, + IVBEP_PCI_UNCORE_QPI, + IVBEP_PCI_UNCORE_R2PCIE, + IVBEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *ivbep_pci_uncores[] = { + [IVBEP_PCI_UNCORE_HA] = &ivbep_uncore_ha, + [IVBEP_PCI_UNCORE_IMC] = &ivbep_uncore_imc, + [IVBEP_PCI_UNCORE_IRP] = &ivbep_uncore_irp, + [IVBEP_PCI_UNCORE_QPI] = &ivbep_uncore_qpi, + [IVBEP_PCI_UNCORE_R2PCIE] = &ivbep_uncore_r2pcie, + [IVBEP_PCI_UNCORE_R3QPI] = &ivbep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id ivbep_uncore_pci_ids[] = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver ivbep_uncore_pci_driver = { + .name = "ivbep_uncore", + .id_table = ivbep_uncore_pci_ids, +}; + +int ivbep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x0e1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); + if (ret) + return ret; + uncore_pci_uncores = ivbep_pci_uncores; + uncore_pci_driver = &ivbep_uncore_pci_driver; + return 0; +} +/* end of IvyTown uncore support */ + +/* KNL uncore support */ +static struct attribute *knl_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static const struct attribute_group knl_uncore_ubox_format_group = { + .name = "format", + .attrs = knl_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type knl_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = KNL_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &knl_uncore_ubox_format_group, +}; + +static struct attribute *knl_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_qor.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid4.attr, + &format_attr_filter_link3.attr, + &format_attr_filter_state4.attr, + &format_attr_filter_local.attr, + &format_attr_filter_all_op.attr, + &format_attr_filter_nnm.attr, + &format_attr_filter_opc3.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static const struct attribute_group knl_uncore_cha_format_group = { + .name = "format", + .attrs = knl_uncore_cha_formats_attr, +}; + +static struct event_constraint knl_uncore_cha_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg knl_uncore_cha_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4), + EVENT_EXTRA_END +}; + +static u64 knl_cha_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x4) + mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP; + return mask; +} + +static struct event_constraint * +knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask); +} + +static int knl_cha_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = knl_uncore_cha_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & knl_cha_filter_mask(idx); + + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE; + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE; + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_NNC; + reg1->idx = idx; + } + return 0; +} + +static void hswep_cbox_enable_event(struct intel_uncore_box *box, + struct perf_event *event); + +static struct intel_uncore_ops knl_uncore_cha_ops = { + .init_box = snbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = knl_cha_hw_config, + .get_constraint = knl_cha_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type knl_uncore_cha = { + .name = "cha", + .num_counters = 4, + .num_boxes = 38, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = KNL_CHA_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = KNL_CHA_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = knl_uncore_cha_constraints, + .ops = &knl_uncore_cha_ops, + .format_group = &knl_uncore_cha_format_group, +}; + +static struct attribute *knl_uncore_pcu_formats_attr[] = { + &format_attr_event2.attr, + &format_attr_use_occ_ctr.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh6.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge_det.attr, + NULL, +}; + +static const struct attribute_group knl_uncore_pcu_format_group = { + .name = "format", + .attrs = knl_uncore_pcu_formats_attr, +}; + +static struct intel_uncore_type knl_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = KNL_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &knl_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *knl_msr_uncores[] = { + &knl_uncore_ubox, + &knl_uncore_cha, + &knl_uncore_pcu, + NULL, +}; + +void knl_uncore_cpu_init(void) +{ + uncore_msr_uncores = knl_msr_uncores; +} + +static void knl_uncore_imc_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, 0); +} + +static void knl_uncore_imc_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK) + == UNCORE_FIXED_EVENT) + pci_write_config_dword(pdev, hwc->config_base, + hwc->config | KNL_PMON_FIXED_CTL_EN); + else + pci_write_config_dword(pdev, hwc->config_base, + hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops knl_uncore_imc_ops = { + .init_box = snbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = knl_uncore_imc_enable_box, + .read_counter = snbep_uncore_pci_read_counter, + .enable_event = knl_uncore_imc_enable_event, + .disable_event = snbep_uncore_pci_disable_event, +}; + +static struct intel_uncore_type knl_uncore_imc_uclk = { + .name = "imc_uclk", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_UCLK_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW, + .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL, + .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type knl_uncore_imc_dclk = { + .name = "imc", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_MC0_CH0_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_MC0_CH0_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_MC0_CH0_MSR_PMON_FIXED_LOW, + .fixed_ctl = KNL_MC0_CH0_MSR_PMON_FIXED_CTL, + .box_ctl = KNL_MC0_CH0_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type knl_uncore_edc_uclk = { + .name = "edc_uclk", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_UCLK_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW, + .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL, + .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type knl_uncore_edc_eclk = { + .name = "edc_eclk", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_EDC0_ECLK_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW, + .fixed_ctl = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL, + .box_ctl = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct event_constraint knl_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type knl_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .constraints = knl_uncore_m2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct attribute *knl_uncore_irp_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_qor.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group knl_uncore_irp_format_group = { + .name = "format", + .attrs = knl_uncore_irp_formats_attr, +}; + +static struct intel_uncore_type knl_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = KNL_IRP_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = KNL_IRP_PCI_PMON_BOX_CTL, + .ops = &snbep_uncore_pci_ops, + .format_group = &knl_uncore_irp_format_group, +}; + +enum { + KNL_PCI_UNCORE_MC_UCLK, + KNL_PCI_UNCORE_MC_DCLK, + KNL_PCI_UNCORE_EDC_UCLK, + KNL_PCI_UNCORE_EDC_ECLK, + KNL_PCI_UNCORE_M2PCIE, + KNL_PCI_UNCORE_IRP, +}; + +static struct intel_uncore_type *knl_pci_uncores[] = { + [KNL_PCI_UNCORE_MC_UCLK] = &knl_uncore_imc_uclk, + [KNL_PCI_UNCORE_MC_DCLK] = &knl_uncore_imc_dclk, + [KNL_PCI_UNCORE_EDC_UCLK] = &knl_uncore_edc_uclk, + [KNL_PCI_UNCORE_EDC_ECLK] = &knl_uncore_edc_eclk, + [KNL_PCI_UNCORE_M2PCIE] = &knl_uncore_m2pcie, + [KNL_PCI_UNCORE_IRP] = &knl_uncore_irp, + NULL, +}; + +/* + * KNL uses a common PCI device ID for multiple instances of an Uncore PMU + * device type. prior to KNL, each instance of a PMU device type had a unique + * device ID. + * + * PCI Device ID Uncore PMU Devices + * ---------------------------------- + * 0x7841 MC0 UClk, MC1 UClk + * 0x7843 MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2, + * MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2 + * 0x7833 EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk, + * EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk + * 0x7835 EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk, + * EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk + * 0x7817 M2PCIe + * 0x7814 IRP +*/ + +static const struct pci_device_id knl_uncore_pci_ids[] = { + { /* MC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), + }, + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), + }, + { /* EDC4 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), + }, + { /* EDC7 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), + }, + { /* M2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver knl_uncore_pci_driver = { + .name = "knl_uncore", + .id_table = knl_uncore_pci_ids, +}; + +int knl_uncore_pci_init(void) +{ + int ret; + + /* All KNL PCI based PMON units are on the same PCI bus except IRP */ + ret = snb_pci2phy_map_init(0x7814); /* IRP */ + if (ret) + return ret; + ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */ + if (ret) + return ret; + uncore_pci_uncores = knl_pci_uncores; + uncore_pci_driver = &knl_uncore_pci_driver; + return 0; +} + +/* end of KNL uncore support */ + +/* Haswell-EP uncore support */ +static struct attribute *hswep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_filter_tid2.attr, + &format_attr_filter_cid.attr, + NULL, +}; + +static const struct attribute_group hswep_uncore_ubox_format_group = { + .name = "format", + .attrs = hswep_uncore_ubox_formats_attr, +}; + +static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + reg1->reg = HSWEP_U_MSR_PMON_FILTER; + reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK; + reg1->idx = 0; + return 0; +} + +static struct intel_uncore_ops hswep_uncore_ubox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_ubox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .num_shared_regs = 1, + .ops = &hswep_uncore_ubox_ops, + .format_group = &hswep_uncore_ubox_format_group, +}; + +static struct attribute *hswep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid3.attr, + &format_attr_filter_link2.attr, + &format_attr_filter_state3.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static const struct attribute_group hswep_uncore_cbox_format_group = { + .name = "format", + .attrs = hswep_uncore_cbox_formats_attr, +}; + +static struct event_constraint hswep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x09, 0x1), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + UNCORE_EVENT_CONSTRAINT(0x3e, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg hswep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + EVENT_EXTRA_END +}; + +static u64 hswep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + if (fields & 0x1) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) { + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC; + } + return mask; +} + +static struct event_constraint * +hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask); +} + +static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void hswep_cbox_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 1, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops hswep_uncore_cbox_ops = { + .init_box = snbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = hswep_cbox_hw_config, + .get_constraint = hswep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 18, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = hswep_uncore_cbox_constraints, + .ops = &hswep_uncore_cbox_ops, + .format_group = &hswep_uncore_cbox_format_group, +}; + +/* + * Write SBOX Initialization register bit by bit to avoid spurious #GPs + */ +static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + + if (msr) { + u64 init = SNBEP_PMON_BOX_CTL_INT; + u64 flags = 0; + int i; + + for_each_set_bit(i, (unsigned long *)&init, 64) { + flags |= (1ULL << i); + wrmsrl(msr, flags); + } + } +} + +static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = { + __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .init_box = hswep_uncore_sbox_msr_init_box +}; + +static struct attribute *hswep_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group hswep_uncore_sbox_format_group = { + .name = "format", + .attrs = hswep_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_type hswep_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 44, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &hswep_uncore_sbox_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + +static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops hswep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &hswep_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *hswep_msr_uncores[] = { + &hswep_uncore_ubox, + &hswep_uncore_cbox, + &hswep_uncore_sbox, + &hswep_uncore_pcu, + NULL, +}; + +#define HSWEP_PCU_DID 0x2fc0 +#define HSWEP_PCU_CAPID4_OFFET 0x94 +#define hswep_get_chop(_cap) (((_cap) >> 6) & 0x3) + +static bool hswep_has_limit_sbox(unsigned int device) +{ + struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL); + u32 capid4; + + if (!dev) + return false; + + pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); + pci_dev_put(dev); + if (!hswep_get_chop(capid4)) + return true; + + return false; +} + +void hswep_uncore_cpu_init(void) +{ + if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + + /* Detect 6-8 core systems with only two SBOXes */ + if (hswep_has_limit_sbox(HSWEP_PCU_DID)) + hswep_uncore_sbox.num_boxes = 2; + + uncore_msr_uncores = hswep_msr_uncores; +} + +static struct intel_uncore_type hswep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct uncore_event_desc hswep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type hswep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8}; + +static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops hswep_uncore_irp_ops = { + .init_box = snbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivbep_uncore_irp_disable_event, + .enable_event = ivbep_uncore_irp_enable_event, + .read_counter = hswep_uncore_irp_read_counter, +}; + +static struct intel_uncore_type hswep_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &hswep_uncore_irp_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type hswep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .format_group = &snbep_uncore_qpi_format_group, +}; + +static struct event_constraint hswep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x1), + UNCORE_EVENT_CONSTRAINT(0x24, 0x1), + UNCORE_EVENT_CONSTRAINT(0x25, 0x1), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x27, 0x1), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x1), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type hswep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .constraints = hswep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct event_constraint hswep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x7), + UNCORE_EVENT_CONSTRAINT(0x08, 0x7), + UNCORE_EVENT_CONSTRAINT(0x09, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0a, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x15, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type hswep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 3, + .perf_ctr_bits = 44, + .constraints = hswep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + HSWEP_PCI_UNCORE_HA, + HSWEP_PCI_UNCORE_IMC, + HSWEP_PCI_UNCORE_IRP, + HSWEP_PCI_UNCORE_QPI, + HSWEP_PCI_UNCORE_R2PCIE, + HSWEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *hswep_pci_uncores[] = { + [HSWEP_PCI_UNCORE_HA] = &hswep_uncore_ha, + [HSWEP_PCI_UNCORE_IMC] = &hswep_uncore_imc, + [HSWEP_PCI_UNCORE_IRP] = &hswep_uncore_irp, + [HSWEP_PCI_UNCORE_QPI] = &hswep_uncore_qpi, + [HSWEP_PCI_UNCORE_R2PCIE] = &hswep_uncore_r2pcie, + [HSWEP_PCI_UNCORE_R3QPI] = &hswep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id hswep_uncore_pci_ids[] = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 1 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver hswep_uncore_pci_driver = { + .name = "hswep_uncore", + .id_table = hswep_uncore_pci_ids, +}; + +int hswep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x2f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); + if (ret) + return ret; + uncore_pci_uncores = hswep_pci_uncores; + uncore_pci_driver = &hswep_uncore_pci_driver; + return 0; +} +/* end of Haswell-EP uncore support */ + +/* BDX uncore support */ + +static struct intel_uncore_type bdx_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_ubox_format_group, +}; + +static struct event_constraint bdx_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x09, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x3e, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type bdx_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 24, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = bdx_uncore_cbox_constraints, + .ops = &hswep_uncore_cbox_ops, + .format_group = &hswep_uncore_cbox_format_group, +}; + +static struct intel_uncore_type bdx_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &hswep_uncore_sbox_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + +#define BDX_MSR_UNCORE_SBOX 3 + +static struct intel_uncore_type *bdx_msr_uncores[] = { + &bdx_uncore_ubox, + &bdx_uncore_cbox, + &hswep_uncore_pcu, + &bdx_uncore_sbox, + NULL, +}; + +/* Bit 7 'Use Occupancy' is not available for counter 0 on BDX */ +static struct event_constraint bdx_uncore_pcu_constraints[] = { + EVENT_CONSTRAINT(0x80, 0xe, 0x80), + EVENT_CONSTRAINT_END +}; + +#define BDX_PCU_DID 0x6fc0 + +void bdx_uncore_cpu_init(void) +{ + if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = bdx_msr_uncores; + + /* Detect systems with no SBOXes */ + if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID)) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + + hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; +} + +static struct intel_uncore_type bdx_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type bdx_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type bdx_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &hswep_uncore_irp_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type bdx_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .format_group = &snbep_uncore_qpi_format_group, +}; + +static struct event_constraint bdx_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x1), + UNCORE_EVENT_CONSTRAINT(0x25, 0x1), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type bdx_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .constraints = bdx_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct event_constraint bdx_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x7), + UNCORE_EVENT_CONSTRAINT(0x07, 0x7), + UNCORE_EVENT_CONSTRAINT(0x08, 0x7), + UNCORE_EVENT_CONSTRAINT(0x09, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0a, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x15, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type bdx_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 3, + .perf_ctr_bits = 48, + .constraints = bdx_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + BDX_PCI_UNCORE_HA, + BDX_PCI_UNCORE_IMC, + BDX_PCI_UNCORE_IRP, + BDX_PCI_UNCORE_QPI, + BDX_PCI_UNCORE_R2PCIE, + BDX_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *bdx_pci_uncores[] = { + [BDX_PCI_UNCORE_HA] = &bdx_uncore_ha, + [BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc, + [BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp, + [BDX_PCI_UNCORE_QPI] = &bdx_uncore_qpi, + [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie, + [BDX_PCI_UNCORE_R3QPI] = &bdx_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id bdx_uncore_pci_ids[] = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 1 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* QPI Port 2 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + BDX_PCI_QPI_PORT2_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver bdx_uncore_pci_driver = { + .name = "bdx_uncore", + .id_table = bdx_uncore_pci_ids, +}; + +int bdx_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x6f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); + + if (ret) + return ret; + uncore_pci_uncores = bdx_pci_uncores; + uncore_pci_driver = &bdx_uncore_pci_driver; + return 0; +} + +/* end of BDX uncore support */ + +/* SKX uncore support */ + +static struct intel_uncore_type skx_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_ubox_format_group, +}; + +static struct attribute *skx_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid4.attr, + &format_attr_filter_state5.attr, + &format_attr_filter_rem.attr, + &format_attr_filter_loc.attr, + &format_attr_filter_nm.attr, + &format_attr_filter_all_op.attr, + &format_attr_filter_not_nm.attr, + &format_attr_filter_opc_0.attr, + &format_attr_filter_opc_1.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static const struct attribute_group skx_uncore_chabox_format_group = { + .name = "format", + .attrs = skx_uncore_cha_formats_attr, +}; + +static struct event_constraint skx_uncore_chabox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg skx_uncore_cha_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x3134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3), + EVENT_EXTRA_END +}; + +static u64 skx_cha_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) { + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_REM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LOC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_OPC0; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_OPC1; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_ISOC; + } + return mask; +} + +static struct event_constraint * +skx_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, skx_cha_filter_mask); +} + +static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; + + for (er = skx_uncore_cha_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & skx_cha_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static struct intel_uncore_ops skx_uncore_chabox_ops = { + /* There is no frz_en for chabox ctl */ + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = skx_cha_hw_config, + .get_constraint = skx_cha_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type skx_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &skx_uncore_chabox_ops, + .format_group = &skx_uncore_chabox_format_group, +}; + +static struct attribute *skx_uncore_iio_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh9.attr, + &format_attr_ch_mask.attr, + &format_attr_fc_mask.attr, + NULL, +}; + +static const struct attribute_group skx_uncore_iio_format_group = { + .name = "format", + .attrs = skx_uncore_iio_formats_attr, +}; + +static struct event_constraint skx_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0x88, 0xc), + UNCORE_EVENT_CONSTRAINT(0x95, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + +static void skx_iio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops skx_uncore_iio_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = skx_iio_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die) +{ + return pmu->type->topology[die].configuration >> + (pmu->pmu_idx * BUS_NUM_STRIDE); +} + +static umode_t +pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, + int die, int zero_bus_pmu) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj)); + + return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode; +} + +static umode_t +skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 0. */ + return pmu_iio_mapping_visible(kobj, attr, die, 0); +} + +static ssize_t skx_iio_mapping_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + struct dev_ext_attribute *ea = to_dev_ext_attribute(attr); + long die = (long)ea->var; + + return sprintf(buf, "%04x:%02x\n", pmu->type->topology[die].segment, + skx_iio_stack(pmu, die)); +} + +static int skx_msr_cpu_bus_read(int cpu, u64 *topology) +{ + u64 msr_value; + + if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) || + !(msr_value & SKX_MSR_CPU_BUS_VALID_BIT)) + return -ENXIO; + + *topology = msr_value; + + return 0; +} + +static int die_to_cpu(int die) +{ + int res = 0, cpu, current_die; + /* + * Using cpus_read_lock() to ensure cpu is not going down between + * looking at cpu_online_mask. + */ + cpus_read_lock(); + for_each_online_cpu(cpu) { + current_die = topology_logical_die_id(cpu); + if (current_die == die) { + res = cpu; + break; + } + } + cpus_read_unlock(); + return res; +} + +static int skx_iio_get_topology(struct intel_uncore_type *type) +{ + int die, ret = -EPERM; + + type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), + GFP_KERNEL); + if (!type->topology) + return -ENOMEM; + + for (die = 0; die < uncore_max_dies(); die++) { + ret = skx_msr_cpu_bus_read(die_to_cpu(die), + &type->topology[die].configuration); + if (ret) + break; + + ret = uncore_die_to_segment(die); + if (ret < 0) + break; + + type->topology[die].segment = ret; + } + + if (ret < 0) { + kfree(type->topology); + type->topology = NULL; + } + + return ret; +} + +static struct attribute_group skx_iio_mapping_group = { + .is_visible = skx_iio_mapping_visible, +}; + +static const struct attribute_group *skx_iio_attr_update[] = { + &skx_iio_mapping_group, + NULL, +}; + +static void pmu_clear_mapping_attr(const struct attribute_group **groups, + struct attribute_group *ag) +{ + int i; + + for (i = 0; groups[i]; i++) { + if (groups[i] == ag) { + for (i++; groups[i]; i++) + groups[i - 1] = groups[i]; + groups[i - 1] = NULL; + break; + } + } +} + +static int +pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +{ + char buf[64]; + int ret; + long die = -1; + struct attribute **attrs = NULL; + struct dev_ext_attribute *eas = NULL; + + ret = type->get_topology(type); + if (ret < 0) + goto clear_attr_update; + + ret = -ENOMEM; + + /* One more for NULL. */ + attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL); + if (!attrs) + goto clear_topology; + + eas = kcalloc(uncore_max_dies(), sizeof(*eas), GFP_KERNEL); + if (!eas) + goto clear_attrs; + + for (die = 0; die < uncore_max_dies(); die++) { + sprintf(buf, "die%ld", die); + sysfs_attr_init(&eas[die].attr.attr); + eas[die].attr.attr.name = kstrdup(buf, GFP_KERNEL); + if (!eas[die].attr.attr.name) + goto err; + eas[die].attr.attr.mode = 0444; + eas[die].attr.show = skx_iio_mapping_show; + eas[die].attr.store = NULL; + eas[die].var = (void *)die; + attrs[die] = &eas[die].attr.attr; + } + ag->attrs = attrs; + + return 0; +err: + for (; die >= 0; die--) + kfree(eas[die].attr.attr.name); + kfree(eas); +clear_attrs: + kfree(attrs); +clear_topology: + kfree(type->topology); +clear_attr_update: + pmu_clear_mapping_attr(type->attr_update, ag); + return ret; +} + +static void +pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +{ + struct attribute **attr = ag->attrs; + + if (!attr) + return; + + for (; *attr; attr++) + kfree((*attr)->name); + kfree(attr_to_ext_attr(*ag->attrs)); + kfree(ag->attrs); + ag->attrs = NULL; + kfree(type->topology); +} + +static int skx_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &skx_iio_mapping_group); +} + +static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group); +} + +static struct intel_uncore_type skx_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = SKX_IIO0_MSR_PMON_CTL0, + .perf_ctr = SKX_IIO0_MSR_PMON_CTR0, + .event_mask = SKX_IIO_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SKX_IIO0_MSR_PMON_BOX_CTL, + .msr_offset = SKX_IIO_MSR_OFFSET, + .constraints = skx_uncore_iio_constraints, + .ops = &skx_uncore_iio_ops, + .format_group = &skx_uncore_iio_format_group, + .attr_update = skx_iio_attr_update, + .get_topology = skx_iio_get_topology, + .set_mapping = skx_iio_set_mapping, + .cleanup_mapping = skx_iio_cleanup_mapping, +}; + +enum perf_uncore_iio_freerunning_type_id { + SKX_IIO_MSR_IOCLK = 0, + SKX_IIO_MSR_BW = 1, + SKX_IIO_MSR_UTIL = 2, + + SKX_IIO_FREERUNNING_TYPE_MAX, +}; + + +static struct freerunning_counters skx_iio_freerunning[] = { + [SKX_IIO_MSR_IOCLK] = { 0xa45, 0x1, 0x20, 1, 36 }, + [SKX_IIO_MSR_BW] = { 0xb00, 0x1, 0x10, 8, 36 }, + [SKX_IIO_MSR_UTIL] = { 0xb08, 0x1, 0x10, 8, 36 }, +}; + +static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = { + /* Free-Running IO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), + /* Free-running IIO UTILIZATION Counters */ + INTEL_UNCORE_EVENT_DESC(util_in_port0, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(util_out_port0, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(util_in_port1, "event=0xff,umask=0x32"), + INTEL_UNCORE_EVENT_DESC(util_out_port1, "event=0xff,umask=0x33"), + INTEL_UNCORE_EVENT_DESC(util_in_port2, "event=0xff,umask=0x34"), + INTEL_UNCORE_EVENT_DESC(util_out_port2, "event=0xff,umask=0x35"), + INTEL_UNCORE_EVENT_DESC(util_in_port3, "event=0xff,umask=0x36"), + INTEL_UNCORE_EVENT_DESC(util_out_port3, "event=0xff,umask=0x37"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = { + .read_counter = uncore_msr_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL, +}; + +static const struct attribute_group skx_uncore_iio_freerunning_format_group = { + .name = "format", + .attrs = skx_uncore_iio_freerunning_formats_attr, +}; + +static struct intel_uncore_type skx_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 17, + .num_boxes = 6, + .num_freerunning_types = SKX_IIO_FREERUNNING_TYPE_MAX, + .freerunning = skx_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = skx_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct attribute *skx_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group skx_uncore_format_group = { + .name = "format", + .attrs = skx_uncore_formats_attr, +}; + +static struct intel_uncore_type skx_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = SKX_IRP0_MSR_PMON_CTL0, + .perf_ctr = SKX_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SKX_IRP0_MSR_PMON_BOX_CTL, + .msr_offset = SKX_IRP_MSR_OFFSET, + .ops = &skx_uncore_iio_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct attribute *skx_uncore_pcu_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge_det.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute_group skx_uncore_pcu_format_group = { + .name = "format", + .attrs = skx_uncore_pcu_formats_attr, +}; + +static struct intel_uncore_ops skx_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type skx_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &skx_uncore_pcu_ops, + .format_group = &skx_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *skx_msr_uncores[] = { + &skx_uncore_ubox, + &skx_uncore_chabox, + &skx_uncore_iio, + &skx_uncore_iio_free_running, + &skx_uncore_irp, + &skx_uncore_pcu, + NULL, +}; + +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6 0x9c +#define SKX_CHA_BIT_MASK GENMASK(27, 0) + +static int skx_count_chabox(void) +{ + struct pci_dev *dev = NULL; + u32 val = 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); + if (!dev) + goto out; + + pci_read_config_dword(dev, SKX_CAPID6, &val); + val &= SKX_CHA_BIT_MASK; +out: + pci_dev_put(dev); + return hweight32(val); +} + +void skx_uncore_cpu_init(void) +{ + skx_uncore_chabox.num_boxes = skx_count_chabox(); + uncore_msr_uncores = skx_msr_uncores; +} + +static struct intel_uncore_type skx_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct attribute *skx_upi_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group skx_upi_uncore_format_group = { + .name = "format", + .attrs = skx_upi_uncore_formats_attr, +}; + +static void skx_upi_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, SKX_UPI_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops skx_upi_uncore_pci_ops = { + .init_box = skx_upi_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct intel_uncore_type skx_uncore_upi = { + .name = "upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SKX_UPI_PCI_PMON_CTR0, + .event_ctl = SKX_UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_UPI_CTL_UMASK_EXT, + .box_ctl = SKX_UPI_PCI_PMON_BOX_CTL, + .ops = &skx_upi_uncore_pci_ops, + .format_group = &skx_upi_uncore_format_group, +}; + +static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, SKX_M2M_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops skx_m2m_uncore_pci_ops = { + .init_box = skx_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct intel_uncore_type skx_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = SKX_M2M_PCI_PMON_CTR0, + .event_ctl = SKX_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SKX_M2M_PCI_PMON_BOX_CTL, + .ops = &skx_m2m_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct event_constraint skx_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type skx_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .constraints = skx_uncore_m2pcie_constraints, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct event_constraint skx_uncore_m3upi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x1d, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1e, 0x1), + UNCORE_EVENT_CONSTRAINT(0x40, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4f, 0x7), + UNCORE_EVENT_CONSTRAINT(0x50, 0x7), + UNCORE_EVENT_CONSTRAINT(0x51, 0x7), + UNCORE_EVENT_CONSTRAINT(0x52, 0x7), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type skx_uncore_m3upi = { + .name = "m3upi", + .num_counters = 3, + .num_boxes = 3, + .perf_ctr_bits = 48, + .constraints = skx_uncore_m3upi_constraints, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +enum { + SKX_PCI_UNCORE_IMC, + SKX_PCI_UNCORE_M2M, + SKX_PCI_UNCORE_UPI, + SKX_PCI_UNCORE_M2PCIE, + SKX_PCI_UNCORE_M3UPI, +}; + +static struct intel_uncore_type *skx_pci_uncores[] = { + [SKX_PCI_UNCORE_IMC] = &skx_uncore_imc, + [SKX_PCI_UNCORE_M2M] = &skx_uncore_m2m, + [SKX_PCI_UNCORE_UPI] = &skx_uncore_upi, + [SKX_PCI_UNCORE_M2PCIE] = &skx_uncore_m2pcie, + [SKX_PCI_UNCORE_M3UPI] = &skx_uncore_m3upi, + NULL, +}; + +static const struct pci_device_id skx_uncore_pci_ids[] = { + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 2, SKX_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 6, SKX_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 2, SKX_PCI_UNCORE_IMC, 2), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 2, SKX_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 6, SKX_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 2, SKX_PCI_UNCORE_IMC, 5), + }, + { /* M2M0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 0, SKX_PCI_UNCORE_M2M, 0), + }, + { /* M2M1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 0, SKX_PCI_UNCORE_M2M, 1), + }, + { /* UPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, SKX_PCI_UNCORE_UPI, 0), + }, + { /* UPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, SKX_PCI_UNCORE_UPI, 1), + }, + { /* UPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, SKX_PCI_UNCORE_UPI, 2), + }, + { /* M2PCIe 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 1, SKX_PCI_UNCORE_M2PCIE, 0), + }, + { /* M2PCIe 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 1, SKX_PCI_UNCORE_M2PCIE, 1), + }, + { /* M2PCIe 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(23, 1, SKX_PCI_UNCORE_M2PCIE, 2), + }, + { /* M2PCIe 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), + }, + { /* M3UPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), + }, + { /* M3UPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), + }, + { /* M3UPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), + }, + { /* end: all zeroes */ } +}; + + +static struct pci_driver skx_uncore_pci_driver = { + .name = "skx_uncore", + .id_table = skx_uncore_pci_ids, +}; + +int skx_uncore_pci_init(void) +{ + /* need to double check pci address */ + int ret = snbep_pci2phy_map_init(0x2014, SKX_CPUNODEID, SKX_GIDNIDMAP, false); + + if (ret) + return ret; + + uncore_pci_uncores = skx_pci_uncores; + uncore_pci_driver = &skx_uncore_pci_driver; + return 0; +} + +/* end of SKX uncore support */ + +/* SNR uncore support */ + +static struct intel_uncore_type snr_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = SNR_U_MSR_PMON_CTR0, + .event_ctl = SNR_U_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct attribute *snr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext2.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group snr_uncore_chabox_format_group = { + .name = "format", + .attrs = snr_uncore_cha_formats_attr, +}; + +static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 + + box->pmu->type->msr_offset * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + + return 0; +} + +static void snr_cha_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snr_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = snr_cha_hw_config, +}; + +static struct intel_uncore_type snr_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = SNR_CHA_MSR_PMON_CTL0, + .perf_ctr = SNR_CHA_MSR_PMON_CTR0, + .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .ops = &snr_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static struct attribute *snr_uncore_iio_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh9.attr, + &format_attr_ch_mask2.attr, + &format_attr_fc_mask2.attr, + NULL, +}; + +static const struct attribute_group snr_uncore_iio_format_group = { + .name = "format", + .attrs = snr_uncore_iio_formats_attr, +}; + +static umode_t +snr_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 1. */ + return pmu_iio_mapping_visible(kobj, attr, die, 1); +} + +static struct attribute_group snr_iio_mapping_group = { + .is_visible = snr_iio_mapping_visible, +}; + +static const struct attribute_group *snr_iio_attr_update[] = { + &snr_iio_mapping_group, + NULL, +}; + +static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_mapping) +{ + u32 sad_cfg; + int die, stack_id, ret = -EPERM; + struct pci_dev *dev = NULL; + + type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), + GFP_KERNEL); + if (!type->topology) + return -ENOMEM; + + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) { + ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg); + if (ret) { + ret = pcibios_err_to_errno(ret); + break; + } + + die = uncore_pcibus_to_dieid(dev->bus); + stack_id = SAD_CONTROL_STACK_ID(sad_cfg); + if (die < 0 || stack_id >= type->num_boxes) { + ret = -EPERM; + break; + } + + /* Convert stack id from SAD_CONTROL to PMON notation. */ + stack_id = sad_pmon_mapping[stack_id]; + + ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number; + type->topology[die].segment = pci_domain_nr(dev->bus); + } + + if (ret) { + kfree(type->topology); + type->topology = NULL; + } + + pci_dev_put(dev); + + return ret; +} + +/* + * SNR has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON + */ +enum { + SNR_QAT_PMON_ID, + SNR_CBDMA_DMI_PMON_ID, + SNR_NIS_PMON_ID, + SNR_DLB_PMON_ID, + SNR_PCIE_GEN3_PMON_ID +}; + +static u8 snr_sad_pmon_mapping[] = { + SNR_CBDMA_DMI_PMON_ID, + SNR_PCIE_GEN3_PMON_ID, + SNR_DLB_PMON_ID, + SNR_NIS_PMON_ID, + SNR_QAT_PMON_ID +}; + +static int snr_iio_get_topology(struct intel_uncore_type *type) +{ + return sad_cfg_iio_topology(type, snr_sad_pmon_mapping); +} + +static int snr_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &snr_iio_mapping_group); +} + +static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); +} + +static struct event_constraint snr_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snr_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IIO_MSR_PMON_CTL0, + .perf_ctr = SNR_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IIO_MSR_OFFSET, + .constraints = snr_uncore_iio_constraints, + .ops = &ivbep_uncore_msr_ops, + .format_group = &snr_uncore_iio_format_group, + .attr_update = snr_iio_attr_update, + .get_topology = snr_iio_get_topology, + .set_mapping = snr_iio_set_mapping, + .cleanup_mapping = snr_iio_cleanup_mapping, +}; + +static struct intel_uncore_type snr_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IRP0_MSR_PMON_CTL0, + .perf_ctr = SNR_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IRP_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0, + .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offset = SNR_M2PCIE_MSR_OFFSET, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops snr_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snr_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snr_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCU_MSR_PMON_CTR0, + .event_ctl = SNR_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snr_uncore_pcu_ops, + .format_group = &skx_uncore_pcu_format_group, +}; + +enum perf_uncore_snr_iio_freerunning_type_id { + SNR_IIO_MSR_IOCLK, + SNR_IIO_MSR_BW_IN, + + SNR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_iio_freerunning[] = { + [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 }, + [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 5, + .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = snr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = snr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_msr_uncores[] = { + &snr_uncore_ubox, + &snr_uncore_chabox, + &snr_uncore_iio, + &snr_uncore_irp, + &snr_uncore_m2pcie, + &snr_uncore_pcu, + &snr_uncore_iio_free_running, + NULL, +}; + +void snr_uncore_cpu_init(void) +{ + uncore_msr_uncores = snr_msr_uncores; +} + +static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops snr_m2m_uncore_pci_ops = { + .init_box = snr_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct attribute *snr_m2m_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext3.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group snr_m2m_uncore_format_group = { + .name = "format", + .attrs = snr_m2m_uncore_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &snr_m2m_uncore_format_group, +}; + +static void snr_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, (u32)(hwc->config | SNBEP_PMON_CTL_EN)); + pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32)); +} + +static struct intel_uncore_ops snr_pcie3_uncore_pci_ops = { + .init_box = snr_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snr_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct intel_uncore_type snr_uncore_pcie3 = { + .name = "pcie3", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, + .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, + .event_mask = SKX_IIO_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, + .ops = &snr_pcie3_uncore_pci_ops, + .format_group = &skx_uncore_iio_format_group, +}; + +enum { + SNR_PCI_UNCORE_M2M, + SNR_PCI_UNCORE_PCIE3, +}; + +static struct intel_uncore_type *snr_pci_uncores[] = { + [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, + [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, + NULL, +}; + +static const struct pci_device_id snr_uncore_pci_ids[] = { + { /* M2M */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snr_uncore_pci_driver = { + .name = "snr_uncore", + .id_table = snr_uncore_pci_ids, +}; + +static const struct pci_device_id snr_uncore_pci_sub_ids[] = { + { /* PCIe3 RP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snr_uncore_pci_sub_driver = { + .name = "snr_uncore_sub", + .id_table = snr_uncore_pci_sub_ids, +}; + +int snr_uncore_pci_init(void) +{ + /* SNR UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = snr_pci_uncores; + uncore_pci_driver = &snr_uncore_pci_driver; + uncore_pci_sub_driver = &snr_uncore_pci_sub_driver; + return 0; +} + +#define SNR_MC_DEVICE_ID 0x3451 + +static struct pci_dev *snr_uncore_get_mc_dev(unsigned int device, int id) +{ + struct pci_dev *mc_dev = NULL; + int pkg; + + while (1) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, mc_dev); + if (!mc_dev) + break; + pkg = uncore_pcibus_to_dieid(mc_dev->bus); + if (pkg == id) + break; + } + return mc_dev; +} + +static int snr_uncore_mmio_map(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) +{ + struct pci_dev *pdev = snr_uncore_get_mc_dev(device, box->dieid); + struct intel_uncore_type *type = box->pmu->type; + resource_size_t addr; + u32 pci_dword; + + if (!pdev) + return -ENODEV; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + + pci_read_config_dword(pdev, mem_offset, &pci_dword); + addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; + + addr += box_ctl; + + pci_dev_put(pdev); + + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) { + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); + return -EINVAL; + } + + return 0; +} + +static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) +{ + if (!snr_uncore_mmio_map(box, box_ctl, mem_offset, device)) + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); +} + +static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), + SNR_IMC_MMIO_MEM0_OFFSET, + SNR_MC_DEVICE_ID); +} + +static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config |= SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + if (!uncore_mmio_is_valid_offset(box, hwc->config_base)) + return; + + writel(hwc->config | SNBEP_PMON_CTL_EN, + box->io_addr + hwc->config_base); +} + +static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + if (!uncore_mmio_is_valid_offset(box, hwc->config_base)) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops snr_uncore_mmio_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct uncore_event_desc snr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = snr_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .ops = &snr_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_snr_imc_freerunning_type_id { + SNR_IMC_DCLK, + SNR_IMC_DDR, + + SNR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_imc_freerunning[] = { + [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type snr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 1, + .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .freerunning = snr_imc_freerunning, + .ops = &snr_uncore_imc_freerunning_ops, + .event_descs = snr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_mmio_uncores[] = { + &snr_uncore_imc, + &snr_uncore_imc_free_running, + NULL, +}; + +void snr_uncore_mmio_init(void) +{ + uncore_mmio_uncores = snr_mmio_uncores; +} + +/* end of SNR uncore support */ + +/* ICX uncore support */ + +static unsigned icx_cha_msr_offsets[] = { + 0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310, + 0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e, + 0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a, + 0x428, 0x436, 0x444, 0x452, 0x460, 0x46e, 0x47c, 0x0, 0xe, + 0x1c, 0x2a, 0x38, 0x46, +}; + +static int icx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN); + + if (tie_en) { + reg1->reg = ICX_C34_MSR_PMON_BOX_FILTER0 + + icx_cha_msr_offsets[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops icx_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = icx_cha_hw_config, +}; + +static struct intel_uncore_type icx_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .perf_ctr_bits = 48, + .event_ctl = ICX_C34_MSR_PMON_CTL0, + .perf_ctr = ICX_C34_MSR_PMON_CTR0, + .box_ctl = ICX_C34_MSR_PMON_BOX_CTL, + .msr_offsets = icx_cha_msr_offsets, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .constraints = skx_uncore_chabox_constraints, + .ops = &icx_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static unsigned icx_msr_offsets[] = { + 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, +}; + +static struct event_constraint icx_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x03, 0x3), + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0x88, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + +static umode_t +icx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 5. */ + return pmu_iio_mapping_visible(kobj, attr, die, 5); +} + +static struct attribute_group icx_iio_mapping_group = { + .is_visible = icx_iio_mapping_visible, +}; + +static const struct attribute_group *icx_iio_attr_update[] = { + &icx_iio_mapping_group, + NULL, +}; + +/* + * ICX has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON + */ +enum { + ICX_PCIE1_PMON_ID, + ICX_PCIE2_PMON_ID, + ICX_PCIE3_PMON_ID, + ICX_PCIE4_PMON_ID, + ICX_PCIE5_PMON_ID, + ICX_CBDMA_DMI_PMON_ID +}; + +static u8 icx_sad_pmon_mapping[] = { + ICX_CBDMA_DMI_PMON_ID, + ICX_PCIE1_PMON_ID, + ICX_PCIE2_PMON_ID, + ICX_PCIE3_PMON_ID, + ICX_PCIE4_PMON_ID, + ICX_PCIE5_PMON_ID, +}; + +static int icx_iio_get_topology(struct intel_uncore_type *type) +{ + return sad_cfg_iio_topology(type, icx_sad_pmon_mapping); +} + +static int icx_iio_set_mapping(struct intel_uncore_type *type) +{ + /* Detect ICX-D system. This case is not supported */ + if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) { + pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group); + return -EPERM; + } + return pmu_iio_set_mapping(type, &icx_iio_mapping_group); +} + +static void icx_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group); +} + +static struct intel_uncore_type icx_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_IIO_MSR_PMON_CTL0, + .perf_ctr = ICX_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = ICX_IIO_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .constraints = icx_uncore_iio_constraints, + .ops = &skx_uncore_iio_ops, + .format_group = &snr_uncore_iio_format_group, + .attr_update = icx_iio_attr_update, + .get_topology = icx_iio_get_topology, + .set_mapping = icx_iio_set_mapping, + .cleanup_mapping = icx_iio_cleanup_mapping, +}; + +static struct intel_uncore_type icx_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_IRP0_MSR_PMON_CTL0, + .perf_ctr = ICX_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = ICX_IRP0_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct event_constraint icx_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = ICX_M2PCIE_MSR_PMON_CTR0, + .box_ctl = ICX_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .constraints = icx_uncore_m2pcie_constraints, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +enum perf_uncore_icx_iio_freerunning_type_id { + ICX_IIO_MSR_IOCLK, + ICX_IIO_MSR_BW_IN, + + ICX_IIO_FREERUNNING_TYPE_MAX, +}; + +static unsigned icx_iio_clk_freerunning_box_offsets[] = { + 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, +}; + +static unsigned icx_iio_bw_freerunning_box_offsets[] = { + 0x0, 0x10, 0x20, 0x90, 0xa0, 0xb0, +}; + +static struct freerunning_counters icx_iio_freerunning[] = { + [ICX_IIO_MSR_IOCLK] = { 0xa55, 0x1, 0x20, 1, 48, icx_iio_clk_freerunning_box_offsets }, + [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, +}; + +static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type icx_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 6, + .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, + .freerunning = icx_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = icx_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *icx_msr_uncores[] = { + &skx_uncore_ubox, + &icx_uncore_chabox, + &icx_uncore_iio, + &icx_uncore_irp, + &icx_uncore_m2pcie, + &skx_uncore_pcu, + &icx_uncore_iio_free_running, + NULL, +}; + +/* + * To determine the number of CHAs, it should read CAPID6(Low) and CAPID7 (High) + * registers which located at Device 30, Function 3 + */ +#define ICX_CAPID6 0x9c +#define ICX_CAPID7 0xa0 + +static u64 icx_count_chabox(void) +{ + struct pci_dev *dev = NULL; + u64 caps = 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x345b, dev); + if (!dev) + goto out; + + pci_read_config_dword(dev, ICX_CAPID6, (u32 *)&caps); + pci_read_config_dword(dev, ICX_CAPID7, (u32 *)&caps + 1); +out: + pci_dev_put(dev); + return hweight64(caps); +} + +void icx_uncore_cpu_init(void) +{ + u64 num_boxes = icx_count_chabox(); + + if (WARN_ON(num_boxes > ARRAY_SIZE(icx_cha_msr_offsets))) + return; + icx_uncore_chabox.num_boxes = num_boxes; + uncore_msr_uncores = icx_msr_uncores; +} + +static struct intel_uncore_type icx_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &snr_m2m_uncore_format_group, +}; + +static struct attribute *icx_upi_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group icx_upi_uncore_format_group = { + .name = "format", + .attrs = icx_upi_uncore_formats_attr, +}; + +static struct intel_uncore_type icx_uncore_upi = { + .name = "upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = ICX_UPI_PCI_PMON_CTR0, + .event_ctl = ICX_UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = ICX_UPI_CTL_UMASK_EXT, + .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, + .ops = &skx_upi_uncore_pci_ops, + .format_group = &icx_upi_uncore_format_group, +}; + +static struct event_constraint icx_uncore_m3upi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x1c, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1d, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1e, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x1), + UNCORE_EVENT_CONSTRAINT(0x40, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4f, 0x7), + UNCORE_EVENT_CONSTRAINT(0x50, 0x7), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_m3upi = { + .name = "m3upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, + .event_ctl = ICX_M3UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, + .constraints = icx_uncore_m3upi_constraints, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +enum { + ICX_PCI_UNCORE_M2M, + ICX_PCI_UNCORE_UPI, + ICX_PCI_UNCORE_M3UPI, +}; + +static struct intel_uncore_type *icx_pci_uncores[] = { + [ICX_PCI_UNCORE_M2M] = &icx_uncore_m2m, + [ICX_PCI_UNCORE_UPI] = &icx_uncore_upi, + [ICX_PCI_UNCORE_M3UPI] = &icx_uncore_m3upi, + NULL, +}; + +static const struct pci_device_id icx_uncore_pci_ids[] = { + { /* M2M 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, ICX_PCI_UNCORE_M2M, 0), + }, + { /* M2M 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 0, ICX_PCI_UNCORE_M2M, 1), + }, + { /* M2M 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, ICX_PCI_UNCORE_M2M, 2), + }, + { /* M2M 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, ICX_PCI_UNCORE_M2M, 3), + }, + { /* UPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(2, 1, ICX_PCI_UNCORE_UPI, 0), + }, + { /* UPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(3, 1, ICX_PCI_UNCORE_UPI, 1), + }, + { /* UPI Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 1, ICX_PCI_UNCORE_UPI, 2), + }, + { /* M3UPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(5, 1, ICX_PCI_UNCORE_M3UPI, 0), + }, + { /* M3UPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(6, 1, ICX_PCI_UNCORE_M3UPI, 1), + }, + { /* M3UPI Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(7, 1, ICX_PCI_UNCORE_M3UPI, 2), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver icx_uncore_pci_driver = { + .name = "icx_uncore", + .id_table = icx_uncore_pci_ids, +}; + +int icx_uncore_pci_init(void) +{ + /* ICX UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3450, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = icx_pci_uncores; + uncore_pci_driver = &icx_uncore_pci_driver; + return 0; +} + +static void icx_uncore_imc_init_box(struct intel_uncore_box *box) +{ + unsigned int box_ctl = box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * (box->pmu->pmu_idx % ICX_NUMBER_IMC_CHN); + int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE + + SNR_IMC_MMIO_MEM0_OFFSET; + + __snr_uncore_mmio_init_box(box, box_ctl, mem_offset, + SNR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops icx_uncore_mmio_ops = { + .init_box = icx_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type icx_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 12, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = snr_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .ops = &icx_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_icx_imc_freerunning_type_id { + ICX_IMC_DCLK, + ICX_IMC_DDR, + ICX_IMC_DDRT, + + ICX_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters icx_imc_freerunning[] = { + [ICX_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [ICX_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, + [ICX_IMC_DDRT] = { 0x22a0, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + + SNR_IMC_MMIO_MEM0_OFFSET; + + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SNR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = { + .init_box = icx_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type icx_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 5, + .num_boxes = 4, + .num_freerunning_types = ICX_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .freerunning = icx_imc_freerunning, + .ops = &icx_uncore_imc_freerunning_ops, + .event_descs = icx_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *icx_mmio_uncores[] = { + &icx_uncore_imc, + &icx_uncore_imc_free_running, + NULL, +}; + +void icx_uncore_mmio_init(void) +{ + uncore_mmio_uncores = icx_mmio_uncores; +} + +/* end of ICX uncore support */ + +/* SPR uncore support */ + +static void spr_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config); +} + +static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, 0); + + wrmsrl(hwc->config_base, 0); +} + +static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN); + struct intel_uncore_type *type = box->pmu->type; + + if (tie_en) { + reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops spr_uncore_chabox_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = spr_uncore_msr_disable_event, + .enable_event = spr_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = spr_cha_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct attribute *spr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_tid_en2.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group spr_uncore_chabox_format_group = { + .name = "format", + .attrs = spr_uncore_cha_formats_attr, +}; + +static ssize_t alias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + char pmu_name[UNCORE_PMU_NAME_LEN]; + + uncore_get_alias_name(pmu_name, pmu); + return sysfs_emit(buf, "%s\n", pmu_name); +} + +static DEVICE_ATTR_RO(alias); + +static struct attribute *uncore_alias_attrs[] = { + &dev_attr_alias.attr, + NULL +}; + +ATTRIBUTE_GROUPS(uncore_alias); + +static struct intel_uncore_type spr_uncore_chabox = { + .name = "cha", + .event_mask = SPR_CHA_PMON_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &spr_uncore_chabox_ops, + .format_group = &spr_uncore_chabox_format_group, + .attr_update = uncore_alias_groups, +}; + +static struct intel_uncore_type spr_uncore_iio = { + .name = "iio", + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .format_group = &snr_uncore_iio_format_group, + .attr_update = uncore_alias_groups, + .constraints = icx_uncore_iio_constraints, +}; + +static struct attribute *spr_uncore_raw_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group spr_uncore_raw_format_group = { + .name = "format", + .attrs = spr_uncore_raw_formats_attr, +}; + +#define SPR_UNCORE_COMMON_FORMAT() \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, \ + .format_group = &spr_uncore_raw_format_group, \ + .attr_update = uncore_alias_groups + +static struct intel_uncore_type spr_uncore_irp = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "irp", + +}; + +static struct event_constraint spr_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type spr_uncore_m2pcie = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "m2pcie", + .constraints = spr_uncore_m2pcie_constraints, +}; + +static struct intel_uncore_type spr_uncore_pcu = { + .name = "pcu", + .attr_update = uncore_alias_groups, +}; + +static void spr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + if (uncore_pmc_fixed(hwc->idx)) + writel(SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base); + else + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops spr_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = spr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type spr_uncore_imc = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "imc", + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .ops = &spr_uncore_mmio_ops, +}; + +static void spr_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32)); + pci_write_config_dword(pdev, hwc->config_base, (u32)hwc->config); +} + +static struct intel_uncore_ops spr_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = spr_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define SPR_UNCORE_PCI_COMMON_FORMAT() \ + SPR_UNCORE_COMMON_FORMAT(), \ + .ops = &spr_uncore_pci_ops + +static struct intel_uncore_type spr_uncore_m2m = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m2m", +}; + +static struct intel_uncore_type spr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type spr_uncore_m3upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m3upi", + .constraints = icx_uncore_m3upi_constraints, +}; + +static struct intel_uncore_type spr_uncore_mdf = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "mdf", +}; + +#define UNCORE_SPR_NUM_UNCORE_TYPES 12 +#define UNCORE_SPR_CHA 0 +#define UNCORE_SPR_IIO 1 +#define UNCORE_SPR_IMC 6 + +static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { + &spr_uncore_chabox, + &spr_uncore_iio, + &spr_uncore_irp, + &spr_uncore_m2pcie, + &spr_uncore_pcu, + NULL, + &spr_uncore_imc, + &spr_uncore_m2m, + &spr_uncore_upi, + &spr_uncore_m3upi, + NULL, + &spr_uncore_mdf, +}; + +enum perf_uncore_spr_iio_freerunning_type_id { + SPR_IIO_MSR_IOCLK, + SPR_IIO_MSR_BW_IN, + SPR_IIO_MSR_BW_OUT, + + SPR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_iio_freerunning[] = { + [SPR_IIO_MSR_IOCLK] = { 0x340e, 0x1, 0x10, 1, 48 }, + [SPR_IIO_MSR_BW_IN] = { 0x3800, 0x1, 0x10, 8, 48 }, + [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + /* Free-Running IIO BANDWIDTH OUT Counters */ + INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type spr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 17, + .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = spr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = spr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +enum perf_uncore_spr_imc_freerunning_type_id { + SPR_IMC_DCLK, + SPR_IMC_PQ_CYCLES, + + SPR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_imc_freerunning[] = { + [SPR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SPR_IMC_PQ_CYCLES] = { 0x2318, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc spr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(rpq_cycles, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(wpq_cycles, "event=0xff,umask=0x21"), + { /* end: all zeroes */ }, +}; + +#define SPR_MC_DEVICE_ID 0x3251 + +static void spr_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; + + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SPR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops spr_uncore_imc_freerunning_ops = { + .init_box = spr_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type spr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .num_freerunning_types = SPR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = spr_imc_freerunning, + .ops = &spr_uncore_imc_freerunning_ops, + .event_descs = spr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +#define UNCORE_SPR_MSR_EXTRA_UNCORES 1 +#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1 + +static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = { + &spr_uncore_iio_free_running, +}; + +static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] = { + &spr_uncore_imc_free_running, +}; + +static void uncore_type_customized_copy(struct intel_uncore_type *to_type, + struct intel_uncore_type *from_type) +{ + if (!to_type || !from_type) + return; + + if (from_type->name) + to_type->name = from_type->name; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->event_mask) + to_type->event_mask = from_type->event_mask; + if (from_type->event_mask_ext) + to_type->event_mask_ext = from_type->event_mask_ext; + if (from_type->fixed_ctr) + to_type->fixed_ctr = from_type->fixed_ctr; + if (from_type->fixed_ctl) + to_type->fixed_ctl = from_type->fixed_ctl; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->num_shared_regs) + to_type->num_shared_regs = from_type->num_shared_regs; + if (from_type->constraints) + to_type->constraints = from_type->constraints; + if (from_type->ops) + to_type->ops = from_type->ops; + if (from_type->event_descs) + to_type->event_descs = from_type->event_descs; + if (from_type->format_group) + to_type->format_group = from_type->format_group; + if (from_type->attr_update) + to_type->attr_update = from_type->attr_update; +} + +static struct intel_uncore_type ** +uncore_get_uncores(enum uncore_access_type type_id, int num_extra, + struct intel_uncore_type **extra) +{ + struct intel_uncore_type **types, **start_types; + int i; + + start_types = types = intel_uncore_generic_init_uncores(type_id, num_extra); + + /* Only copy the customized features */ + for (; *types; types++) { + if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES) + continue; + uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]); + } + + for (i = 0; i < num_extra; i++, types++) + *types = extra[i]; + + return start_types; +} + +static struct intel_uncore_type * +uncore_find_type_by_id(struct intel_uncore_type **types, int type_id) +{ + for (; *types; types++) { + if (type_id == (*types)->type_id) + return *types; + } + + return NULL; +} + +static int uncore_type_max_boxes(struct intel_uncore_type **types, + int type_id) +{ + struct intel_uncore_type *type; + int i, max = 0; + + type = uncore_find_type_by_id(types, type_id); + if (!type) + return 0; + + for (i = 0; i < type->num_boxes; i++) { + if (type->box_ids[i] > max) + max = type->box_ids[i]; + } + + return max + 1; +} + +#define SPR_MSR_UNC_CBO_CONFIG 0x2FFE + +void spr_uncore_cpu_init(void) +{ + struct intel_uncore_type *type; + u64 num_cbo; + + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, + UNCORE_SPR_MSR_EXTRA_UNCORES, + spr_msr_uncores); + + type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA); + if (type) { + /* + * The value from the discovery table (stored in the type->num_boxes + * of UNCORE_SPR_CHA) is incorrect on some SPR variants because of a + * firmware bug. Using the value from SPR_MSR_UNC_CBO_CONFIG to replace it. + */ + rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo); + /* + * The MSR doesn't work on the EMR XCC, but the firmware bug doesn't impact + * the EMR XCC. Don't let the value from the MSR replace the existing value. + */ + if (num_cbo) + type->num_boxes = num_cbo; + } + spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); +} + +int spr_uncore_pci_init(void) +{ + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL); + return 0; +} + +void spr_uncore_mmio_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); + + if (ret) + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL); + else { + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, + UNCORE_SPR_MMIO_EXTRA_UNCORES, + spr_mmio_uncores); + + spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2; + } +} + +/* end of SPR uncore support */ diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c new file mode 100644 index 000000000..c65d8906c --- /dev/null +++ b/arch/x86/events/msr.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "probe.h" + +enum perf_msr_id { + PERF_MSR_TSC = 0, + PERF_MSR_APERF = 1, + PERF_MSR_MPERF = 2, + PERF_MSR_PPERF = 3, + PERF_MSR_SMI = 4, + PERF_MSR_PTSC = 5, + PERF_MSR_IRPERF = 6, + PERF_MSR_THERM = 7, + PERF_MSR_EVENT_MAX, +}; + +static bool test_aperfmperf(int idx, void *data) +{ + return boot_cpu_has(X86_FEATURE_APERFMPERF); +} + +static bool test_ptsc(int idx, void *data) +{ + return boot_cpu_has(X86_FEATURE_PTSC); +} + +static bool test_irperf(int idx, void *data) +{ + return boot_cpu_has(X86_FEATURE_IRPERF); +} + +static bool test_therm_status(int idx, void *data) +{ + return boot_cpu_has(X86_FEATURE_DTHERM); +} + +static bool test_intel(int idx, void *data) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6) + return false; + + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + + case INTEL_FAM6_HASWELL: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_L: + case INTEL_FAM6_HASWELL_G: + + case INTEL_FAM6_BROADWELL: + case INTEL_FAM6_BROADWELL_D: + case INTEL_FAM6_BROADWELL_G: + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: + + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_D: + case INTEL_FAM6_ATOM_AIRMONT: + + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: + case INTEL_FAM6_ATOM_TREMONT_L: + + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + if (idx == PERF_MSR_SMI) + return true; + break; + + case INTEL_FAM6_SKYLAKE_L: + case INTEL_FAM6_SKYLAKE: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_ICELAKE: + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_ALDERLAKE_N: + case INTEL_FAM6_RAPTORLAKE: + case INTEL_FAM6_RAPTORLAKE_P: + case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: + if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) + return true; + break; + } + + return false; +} + +PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" ); +PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" ); +PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" ); +PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" ); +PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" ); +PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" ); +PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" ); + +static unsigned long msr_mask; + +PMU_EVENT_GROUP(events, aperf); +PMU_EVENT_GROUP(events, mperf); +PMU_EVENT_GROUP(events, pperf); +PMU_EVENT_GROUP(events, smi); +PMU_EVENT_GROUP(events, ptsc); +PMU_EVENT_GROUP(events, irperf); + +static struct attribute *attrs_therm[] = { + &attr_therm.attr.attr, + &attr_therm_snap.attr.attr, + &attr_therm_unit.attr.attr, + NULL, +}; + +static struct attribute_group group_therm = { + .name = "events", + .attrs = attrs_therm, +}; + +static struct perf_msr msr[] = { + [PERF_MSR_TSC] = { .no_check = true, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, }, +}; + +static struct attribute *events_attrs[] = { + &attr_tsc.attr.attr, + NULL, +}; + +static struct attribute_group events_attr_group = { + .name = "events", + .attrs = events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); +static struct attribute *format_attrs[] = { + &format_attr_event.attr, + NULL, +}; +static struct attribute_group format_attr_group = { + .name = "format", + .attrs = format_attrs, +}; + +static const struct attribute_group *attr_groups[] = { + &events_attr_group, + &format_attr_group, + NULL, +}; + +static const struct attribute_group *attr_update[] = { + &group_aperf, + &group_mperf, + &group_pperf, + &group_smi, + &group_ptsc, + &group_irperf, + &group_therm, + NULL, +}; + +static int msr_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + + if (!(msr_mask & (1 << cfg))) + return -EINVAL; + + event->hw.idx = -1; + event->hw.event_base = msr[cfg].msr; + event->hw.config = cfg; + + return 0; +} + +static inline u64 msr_read_counter(struct perf_event *event) +{ + u64 now; + + if (event->hw.event_base) + rdmsrl(event->hw.event_base, now); + else + now = rdtsc_ordered(); + + return now; +} + +static void msr_event_update(struct perf_event *event) +{ + u64 prev, now; + s64 delta; + + /* Careful, an NMI might modify the previous event value: */ +again: + prev = local64_read(&event->hw.prev_count); + now = msr_read_counter(event); + + if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev) + goto again; + + delta = now - prev; + if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) { + delta = sign_extend64(delta, 31); + local64_add(delta, &event->count); + } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) { + /* If valid, extract digital readout, otherwise set to -1: */ + now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1; + local64_set(&event->count, now); + } else { + local64_add(delta, &event->count); + } +} + +static void msr_event_start(struct perf_event *event, int flags) +{ + u64 now = msr_read_counter(event); + + local64_set(&event->hw.prev_count, now); +} + +static void msr_event_stop(struct perf_event *event, int flags) +{ + msr_event_update(event); +} + +static void msr_event_del(struct perf_event *event, int flags) +{ + msr_event_stop(event, PERF_EF_UPDATE); +} + +static int msr_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + msr_event_start(event, flags); + + return 0; +} + +static struct pmu pmu_msr = { + .task_ctx_nr = perf_sw_context, + .attr_groups = attr_groups, + .event_init = msr_event_init, + .add = msr_event_add, + .del = msr_event_del, + .start = msr_event_start, + .stop = msr_event_stop, + .read = msr_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .attr_update = attr_update, +}; + +static int __init msr_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_TSC)) { + pr_cont("no MSR PMU driver.\n"); + return 0; + } + + msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL); + + perf_pmu_register(&pmu_msr, "msr", -1); + + return 0; +} +device_initcall(msr_init); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h new file mode 100644 index 000000000..332d2e6d8 --- /dev/null +++ b/arch/x86/events/perf_event.h @@ -0,0 +1,1669 @@ +/* + * Performance events x86 architecture header + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, + * Copyright (C) 2009 Google, Inc., Stephane Eranian + * + * For licencing details see kernel-base/COPYING + */ + +#include + +#include +#include +#include + +/* To enable MSR tracing please use the generic trace points. */ + +/* + * | NHM/WSM | SNB | + * register ------------------------------- + * | HT | no HT | HT | no HT | + *----------------------------------------- + * offcore | core | core | cpu | core | + * lbr_sel | core | core | cpu | core | + * ld_lat | cpu | core | cpu | core | + *----------------------------------------- + * + * Given that there is a small number of shared regs, + * we can pre-allocate their slot in the per-cpu + * per-core reg tables. + */ +enum extra_reg_type { + EXTRA_REG_NONE = -1, /* not used */ + + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ + + EXTRA_REG_MAX /* number of entries needed */ +}; + +struct event_constraint { + union { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 idxmsk64; + }; + u64 code; + u64 cmask; + int weight; + int overlap; + int flags; + unsigned int size; +}; + +static inline bool constraint_match(struct event_constraint *c, u64 ecode) +{ + return ((ecode & c->cmask) - c->code) <= (u64)c->size; +} + +#define PERF_ARCH(name, val) \ + PERF_X86_EVENT_##name = val, + +/* + * struct hw_perf_event.flags flags + */ +enum { +#include "perf_event_flags.h" +}; + +#undef PERF_ARCH + +#define PERF_ARCH(name, val) \ + static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \ + PERF_X86_EVENT_##name); + +#include "perf_event_flags.h" + +#undef PERF_ARCH + +static inline bool is_topdown_count(struct perf_event *event) +{ + return event->hw.flags & PERF_X86_EVENT_TOPDOWN; +} + +static inline bool is_metric_event(struct perf_event *event) +{ + u64 config = event->attr.config; + + return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) && + ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) && + ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX); +} + +static inline bool is_slots_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS; +} + +static inline bool is_topdown_event(struct perf_event *event) +{ + return is_metric_event(event) || is_slots_event(event); +} + +struct amd_nb { + int nb_id; /* NorthBridge id */ + int refcnt; /* reference count */ + struct perf_event *owners[X86_PMC_IDX_MAX]; + struct event_constraint event_constraints[X86_PMC_IDX_MAX]; +}; + +#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) +#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) +#define PEBS_OUTPUT_OFFSET 61 +#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) +#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) +#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) + +/* + * Flags PEBS can handle without an PMI. + * + * TID can only be handled by flushing at context switch. + * REGS_USER can be handled for events limited to ring 3. + * + */ +#define LARGE_PEBS_FLAGS \ + (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_WEIGHT_TYPE) + +#define PEBS_GP_REGS \ + ((1ULL << PERF_REG_X86_AX) | \ + (1ULL << PERF_REG_X86_BX) | \ + (1ULL << PERF_REG_X86_CX) | \ + (1ULL << PERF_REG_X86_DX) | \ + (1ULL << PERF_REG_X86_DI) | \ + (1ULL << PERF_REG_X86_SI) | \ + (1ULL << PERF_REG_X86_SP) | \ + (1ULL << PERF_REG_X86_BP) | \ + (1ULL << PERF_REG_X86_IP) | \ + (1ULL << PERF_REG_X86_FLAGS) | \ + (1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) + +/* + * Per register state. + */ +struct er_account { + raw_spinlock_t lock; /* per-core: protect structure */ + u64 config; /* extra MSR config */ + u64 reg; /* extra MSR number */ + atomic_t ref; /* reference count */ +}; + +/* + * Per core/cpu state + * + * Used to coordinate shared registers between HT threads or + * among events on a single PMU. + */ +struct intel_shared_regs { + struct er_account regs[EXTRA_REG_MAX]; + int refcnt; /* per-core: #HT threads */ + unsigned core_id; /* per-core: core id */ +}; + +enum intel_excl_state_type { + INTEL_EXCL_UNUSED = 0, /* counter is unused */ + INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */ + INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */ +}; + +struct intel_excl_states { + enum intel_excl_state_type state[X86_PMC_IDX_MAX]; + bool sched_started; /* true if scheduling has started */ +}; + +struct intel_excl_cntrs { + raw_spinlock_t lock; + + struct intel_excl_states states[2]; + + union { + u16 has_exclusive[2]; + u32 exclusive_present; + }; + + int refcnt; /* per-core: #HT threads */ + unsigned core_id; /* per-core: core id */ +}; + +struct x86_perf_task_context; +#define MAX_LBR_ENTRIES 32 + +enum { + LBR_FORMAT_32 = 0x00, + LBR_FORMAT_LIP = 0x01, + LBR_FORMAT_EIP = 0x02, + LBR_FORMAT_EIP_FLAGS = 0x03, + LBR_FORMAT_EIP_FLAGS2 = 0x04, + LBR_FORMAT_INFO = 0x05, + LBR_FORMAT_TIME = 0x06, + LBR_FORMAT_INFO2 = 0x07, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, +}; + +enum { + X86_PERF_KFREE_SHARED = 0, + X86_PERF_KFREE_EXCL = 1, + X86_PERF_KFREE_MAX +}; + +struct cpu_hw_events { + /* + * Generic x86 PMC bits + */ + struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int enabled; + + int n_events; /* the # of events in the below arrays */ + int n_added; /* the # last events in the below arrays; + they've never been enabled yet */ + int n_txn; /* the # last events in the below arrays; + added in the current transaction */ + int n_txn_pair; + int n_txn_metric; + int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + u64 tags[X86_PMC_IDX_MAX]; + + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; + + int n_excl; /* the number of exclusive events */ + + unsigned int txn_flags; + int is_fake; + + /* + * Intel DebugStore bits + */ + struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; + u64 pebs_enabled; + int n_pebs; + int n_large_pebs; + int n_pebs_via_pt; + int pebs_output; + + /* Current super set of events hardware configuration */ + u64 pebs_data_cfg; + u64 active_pebs_data_cfg; + int pebs_record_size; + + /* Intel Fixed counter configuration */ + u64 fixed_ctrl_val; + u64 active_fixed_ctrl_val; + + /* + * Intel LBR bits + */ + int lbr_users; + int lbr_pebs_users; + struct perf_branch_stack lbr_stack; + struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + union { + struct er_account *lbr_sel; + struct er_account *lbr_ctl; + }; + u64 br_sel; + void *last_task_ctx; + int last_log_id; + int lbr_select; + void *lbr_xsave; + + /* + * Intel host/guest exclude bits + */ + u64 intel_ctrl_guest_mask; + u64 intel_ctrl_host_mask; + struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; + + /* + * Intel checkpoint mask + */ + u64 intel_cp_status; + + /* + * manage shared (per-core, per-cpu) registers + * used on Intel NHM/WSM/SNB + */ + struct intel_shared_regs *shared_regs; + /* + * manage exclusive counter access between hyperthread + */ + struct event_constraint *constraint_list; /* in enable order */ + struct intel_excl_cntrs *excl_cntrs; + int excl_thread_id; /* 0 or 1 */ + + /* + * SKL TSX_FORCE_ABORT shadow + */ + u64 tfa_shadow; + + /* + * Perf Metrics + */ + /* number of accepted metrics events */ + int n_metric; + + /* + * AMD specific bits + */ + struct amd_nb *amd_nb; + int brs_active; /* BRS is enabled */ + + /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ + u64 perf_ctr_virt_mask; + int n_pair; /* Large increment events */ + + void *kfree_on_online[X86_PERF_KFREE_MAX]; + + struct pmu *pmu; +}; + +#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ + { .idxmsk64 = (n) }, \ + .code = (c), \ + .size = (e) - (c), \ + .cmask = (m), \ + .weight = (w), \ + .overlap = (o), \ + .flags = f, \ +} + +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \ + __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f) + +#define EVENT_CONSTRAINT(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) + +/* + * The constraint_match() function only works for 'simple' event codes + * and not for extended (AMD64_EVENTSEL_EVENT) events codes. + */ +#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \ + __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0) + +#define INTEL_EXCLEVT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ + 0, PERF_X86_EVENT_EXCL) + +/* + * The overlap flag marks event constraints with overlapping counter + * masks. This is the case if the counter mask of such an event is not + * a subset of any other counter mask of a constraint with an equal or + * higher weight, e.g.: + * + * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); + * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); + * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); + * + * The event scheduler may not select the correct counter in the first + * cycle because it needs to know which subsequent events will be + * scheduled. It may fail to schedule the events then. So we set the + * overlap flag for such constraints to give the scheduler a hint which + * events to select for counter rescheduling. + * + * Care must be taken as the rescheduling algorithm is O(n!) which + * will increase scheduling cycles for an over-committed system + * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros + * and its counter masks must be kept at a minimum. + */ +#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) + +/* + * Constraint on the Event code. + */ +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) + +/* + * Constraint on a range of Event codes + */ +#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \ + EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT) + +/* + * Constraint on the Event code + UMask + fixed-mask + * + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * - in_tx + * - in_tx_checkpointed + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) + +/* + * The special metric counters do not actually exist. They are calculated from + * the combination of the FxCtr3 + MSR_PERF_METRICS. + * + * The special metric counters are mapped to a dummy offset for the scheduler. + * The sharing between multiple users of the same metric without multiplexing + * is not allowed, even though the hardware supports that in principle. + */ + +#define METRIC_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \ + INTEL_ARCH_EVENT_MASK) + +/* + * Constraint on the Event code + UMask + */ +#define INTEL_UEVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + +/* Constraint on specific umask bit only + event */ +#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c)) + +/* Like UEVENT_CONSTRAINT, but match flags too */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +#define INTEL_EXCLUEVT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_EXCL) + +#define INTEL_PLD_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) + +#define INTEL_PSD_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT) + +#define INTEL_PST_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) + +#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) + +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) + +#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \ + EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) + +/* Check flags and event code, and set the HSW store flag */ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) + +/* Check flags and event code, and set the HSW load flag */ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \ + __EVENT_CONSTRAINT_RANGE(code, end, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, \ + PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) + +/* Check flags and event code/umask, and set the HSW store flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) + +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, \ + PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL) + +/* Check flags and event code/umask, and set the HSW load flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, \ + PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) + +/* Check flags and event code/umask, and set the HSW N/A flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) + + +/* + * We define the end marker as having a weight of -1 + * to enable blacklisting of events using a counter bitmask + * of zero and thus a weight of zero. + * The end marker has a weight that cannot possibly be + * obtained from counting the bits in the bitmask. + */ +#define EVENT_CONSTRAINT_END { .weight = -1 } + +/* + * Check for end marker with weight == -1 + */ +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->weight != -1; (e)++) + +/* + * Extra registers for specific events. + * + * Some events need large masks and require external MSRs. + * Those extra MSRs end up being shared for all events on + * a PMU and sometimes between PMU of sibling HT threads. + * In either case, the kernel needs to handle conflicting + * accesses to those extra, shared, regs. The data structure + * to manage those registers is stored in cpu_hw_event. + */ +struct extra_reg { + unsigned int event; + unsigned int msr; + u64 config_mask; + u64 valid_mask; + int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; +}; + +#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ + } + +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) + +#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) + +#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ + INTEL_UEVENT_EXTRA_REG(c, \ + MSR_PEBS_LD_LAT_THRESHOLD, \ + 0xffff, \ + LDLAT) + +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) + +union perf_capabilities { + struct { + u64 lbr_format:6; + u64 pebs_trap:1; + u64 pebs_arch_reg:1; + u64 pebs_format:4; + u64 smm_freeze:1; + /* + * PMU supports separate counter range for writing + * values > 32bit. + */ + u64 full_width_write:1; + u64 pebs_baseline:1; + u64 perf_metrics:1; + u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; + }; + u64 capabilities; +}; + +struct x86_pmu_quirk { + struct x86_pmu_quirk *next; + void (*func)(void); +}; + +union x86_pmu_config { + struct { + u64 event:8, + umask:8, + usr:1, + os:1, + edge:1, + pc:1, + interrupt:1, + __reserved1:1, + en:1, + inv:1, + cmask:8, + event2:4, + __reserved2:4, + go:1, + ho:1; + } bits; + u64 value; +}; + +#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value + +enum { + x86_lbr_exclusive_lbr, + x86_lbr_exclusive_bts, + x86_lbr_exclusive_pt, + x86_lbr_exclusive_max, +}; + +#define PERF_PEBS_DATA_SOURCE_MAX 0x10 + +struct x86_hybrid_pmu { + struct pmu pmu; + const char *name; + u8 cpu_type; + cpumask_t supported_cpus; + union perf_capabilities intel_cap; + u64 intel_ctrl; + int max_pebs_events; + int num_counters; + int num_counters_fixed; + struct event_constraint unconstrained; + + u64 hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + u64 hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + struct event_constraint *event_constraints; + struct event_constraint *pebs_constraints; + struct extra_reg *extra_regs; + + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; + + u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX]; +}; + +static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) +{ + return container_of(pmu, struct x86_hybrid_pmu, pmu); +} + +extern struct static_key_false perf_is_hybrid; +#define is_hybrid() static_branch_unlikely(&perf_is_hybrid) + +#define hybrid(_pmu, _field) \ +(*({ \ + typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = &hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +})) + +#define hybrid_var(_pmu, _var) \ +(*({ \ + typeof(&_var) __Fp = &_var; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = &hybrid_pmu(_pmu)->_var; \ + \ + __Fp; \ +})) + +#define hybrid_bit(_pmu, _field) \ +({ \ + bool __Fp = x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +}) + +enum hybrid_pmu_type { + hybrid_big = 0x40, + hybrid_small = 0x20, + + hybrid_big_small = hybrid_big | hybrid_small, +}; + +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 + +#define X86_HYBRID_NUM_PMUS 2 + +/* + * struct x86_pmu - generic x86 pmu + */ +struct x86_pmu { + /* + * Generic x86 PMC bits + */ + const char *name; + int version; + int (*handle_irq)(struct pt_regs *); + void (*disable_all)(void); + void (*enable_all)(int added); + void (*enable)(struct perf_event *); + void (*disable)(struct perf_event *); + void (*assign)(struct perf_event *event, int idx); + void (*add)(struct perf_event *); + void (*del)(struct perf_event *); + void (*read)(struct perf_event *event); + int (*set_period)(struct perf_event *event); + u64 (*update)(struct perf_event *event); + int (*hw_config)(struct perf_event *event); + int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); + unsigned eventsel; + unsigned perfctr; + int (*addr_offset)(int index, bool eventsel); + int (*rdpmc_index)(int index); + u64 (*event_map)(int); + int max_events; + int num_counters; + int num_counters_fixed; + int cntval_bits; + u64 cntval_mask; + union { + unsigned long events_maskl; + unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; + }; + int events_mask_len; + int apic; + u64 max_period; + struct event_constraint * + (*get_event_constraints)(struct cpu_hw_events *cpuc, + int idx, + struct perf_event *event); + + void (*put_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + + void (*start_scheduling)(struct cpu_hw_events *cpuc); + + void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); + + void (*stop_scheduling)(struct cpu_hw_events *cpuc); + + struct event_constraint *event_constraints; + struct x86_pmu_quirk *quirks; + void (*limit_period)(struct perf_event *event, s64 *l); + + /* PMI handler bits */ + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; + /* + * sysfs attrs + */ + int attr_rdpmc_broken; + int attr_rdpmc; + struct attribute **format_attrs; + + ssize_t (*events_sysfs_show)(char *page, u64 config); + const struct attribute_group **attr_update; + + unsigned long attr_freeze_on_smi; + + /* + * CPU Hotplug hooks + */ + int (*cpu_prepare)(int cpu); + void (*cpu_starting)(int cpu); + void (*cpu_dying)(int cpu); + void (*cpu_dead)(int cpu); + + void (*check_microcode)(void); + void (*sched_task)(struct perf_event_context *ctx, + bool sched_in); + + /* + * Intel Arch Perfmon v2+ + */ + u64 intel_ctrl; + union perf_capabilities intel_cap; + + /* + * Intel DebugStore bits + */ + unsigned int bts :1, + bts_active :1, + pebs :1, + pebs_active :1, + pebs_broken :1, + pebs_prec_dist :1, + pebs_no_tlb :1, + pebs_no_isolation :1, + pebs_block :1, + pebs_ept :1; + int pebs_record_size; + int pebs_buffer_size; + int max_pebs_events; + void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); + struct event_constraint *pebs_constraints; + void (*pebs_aliases)(struct perf_event *event); + u64 (*pebs_latency_data)(struct perf_event *event, u64 status); + unsigned long large_pebs_flags; + u64 rtm_abort_event; + u64 pebs_capable; + + /* + * Intel LBR + */ + unsigned int lbr_tos, lbr_from, lbr_to, + lbr_info, lbr_nr; /* LBR base regs and size */ + union { + u64 lbr_sel_mask; /* LBR_SELECT valid bits */ + u64 lbr_ctl_mask; /* LBR_CTL valid bits */ + }; + union { + const int *lbr_sel_map; /* lbr_select mappings */ + int *lbr_ctl_map; /* LBR_CTL mappings */ + }; + bool lbr_double_abort; /* duplicated lbr aborts */ + bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + + unsigned int lbr_has_info:1; + unsigned int lbr_has_tsx:1; + unsigned int lbr_from_flags:1; + unsigned int lbr_to_cycles:1; + + /* + * Intel Architectural LBR CPUID Enumeration + */ + unsigned int lbr_depth_mask:8; + unsigned int lbr_deep_c_reset:1; + unsigned int lbr_lip:1; + unsigned int lbr_cpl:1; + unsigned int lbr_filter:1; + unsigned int lbr_call_stack:1; + unsigned int lbr_mispred:1; + unsigned int lbr_timed_lbr:1; + unsigned int lbr_br_type:1; + + void (*lbr_reset)(void); + void (*lbr_read)(struct cpu_hw_events *cpuc); + void (*lbr_save)(void *ctx); + void (*lbr_restore)(void *ctx); + + /* + * Intel PT/LBR/BTS are exclusive + */ + atomic_t lbr_exclusive[x86_lbr_exclusive_max]; + + /* + * Intel perf metrics + */ + int num_topdown_events; + + /* + * perf task context (i.e. struct perf_event_context::task_ctx_data) + * switch helper to bridge calls from perf/core to perf/x86. + * See struct pmu::swap_task_ctx() usage for examples; + */ + void (*swap_task_ctx)(struct perf_event_context *prev, + struct perf_event_context *next); + + /* + * AMD bits + */ + unsigned int amd_nb_constraints : 1; + u64 perf_ctr_pair_en; + + /* + * Extra registers for events + */ + struct extra_reg *extra_regs; + unsigned int flags; + + /* + * Intel host/guest support (KVM) + */ + struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data); + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 period); + + int (*aux_output_match) (struct perf_event *event); + + int (*filter_match)(struct perf_event *event); + /* + * Hybrid support + * + * Most PMU capabilities are the same among different hybrid PMUs. + * The global x86_pmu saves the architecture capabilities, which + * are available for all PMUs. The hybrid_pmu only includes the + * unique capabilities. + */ + int num_hybrid_pmus; + struct x86_hybrid_pmu *hybrid_pmu; + u8 (*get_hybrid_cpu_type) (void); +}; + +struct x86_perf_task_context_opt { + int lbr_callstack_users; + int lbr_stack_state; + int log_id; +}; + +struct x86_perf_task_context { + u64 lbr_sel; + int tos; + int valid_lbrs; + struct x86_perf_task_context_opt opt; + struct lbr_entry lbr[MAX_LBR_ENTRIES]; +}; + +struct x86_perf_task_context_arch_lbr { + struct x86_perf_task_context_opt opt; + struct lbr_entry entries[]; +}; + +/* + * Add padding to guarantee the 64-byte alignment of the state buffer. + * + * The structure is dynamically allocated. The size of the LBR state may vary + * based on the number of LBR registers. + * + * Do not put anything after the LBR state. + */ +struct x86_perf_task_context_arch_lbr_xsave { + struct x86_perf_task_context_opt opt; + + union { + struct xregs_state xsave; + struct { + struct fxregs_state i387; + struct xstate_header header; + struct arch_lbr_state lbr; + } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT))); + }; +}; + +#define x86_add_quirk(func_) \ +do { \ + static struct x86_pmu_quirk __quirk __initdata = { \ + .func = func_, \ + }; \ + __quirk.next = x86_pmu.quirks; \ + x86_pmu.quirks = &__quirk; \ +} while (0) + +/* + * x86_pmu flags + */ +#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */ +#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ +#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ +#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ +#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ +#define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ +#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ +#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ + +#define EVENT_VAR(_id) event_attr_##_id +#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + +#define EVENT_ATTR(_name, _id) \ +static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = PERF_COUNT_HW_##_id, \ + .event_str = NULL, \ +}; + +#define EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +}; + +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + +#define EVENT_ATTR_STR_HYBRID(_name, v, str, _pmu) \ +static struct perf_pmu_events_hybrid_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_hybrid_sysfs_show, NULL),\ + .id = 0, \ + .event_str = str, \ + .pmu_type = _pmu, \ +} + +#define FORMAT_HYBRID_PTR(_id) (&format_attr_hybrid_##_id.attr.attr) + +#define FORMAT_ATTR_HYBRID(_name, _pmu) \ +static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ + .attr = __ATTR_RO(_name), \ + .pmu_type = _pmu, \ +} + +struct pmu *x86_get_pmu(unsigned int cpu); +extern struct x86_pmu x86_pmu __read_mostly; + +DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); +DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); + +static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) +{ + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt; + + return &((struct x86_perf_task_context *)ctx)->opt; +} + +static inline bool x86_pmu_has_lbr_callstack(void) +{ + return x86_pmu.lbr_sel_map && + x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; +} + +DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); + +int x86_perf_event_set_period(struct perf_event *event); + +/* + * Generalized hw caching related hw_event table, filled + * in on a per model basis. A value of 0 means + * 'not supported', -1 means 'hw_event makes no sense on + * this CPU', any other value means the raw hw_event + * ID. + */ + +#define C(x) PERF_COUNT_HW_CACHE_##x + +extern u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +extern u64 __read_mostly hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +u64 x86_perf_event_update(struct perf_event *event); + +static inline unsigned int x86_pmu_config_addr(int index) +{ + return x86_pmu.eventsel + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, true) : index); +} + +static inline unsigned int x86_pmu_event_addr(int index) +{ + return x86_pmu.perfctr + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, false) : index); +} + +static inline int x86_pmu_rdpmc_index(int index) +{ + return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; +} + +bool check_hw_exists(struct pmu *pmu, int num_counters, + int num_counters_fixed); + +int x86_add_exclusive(unsigned int what); + +void x86_del_exclusive(unsigned int what); + +int x86_reserve_hardware(void); + +void x86_release_hardware(void); + +int x86_pmu_max_precise(void); + +void hw_perf_lbr_event_destroy(struct perf_event *event); + +int x86_setup_perfctr(struct perf_event *event); + +int x86_pmu_hw_config(struct perf_event *event); + +void x86_pmu_disable_all(void); + +static inline bool has_amd_brs(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_AMD_BRS; +} + +static inline bool is_counter_pair(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_PAIR; +} + +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, + u64 enable_mask) +{ + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); + + if (hwc->extra_reg.reg) + wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + + /* + * Add enabled Merge event on next counter + * if large increment event being enabled on this counter + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + + wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); +} + +void x86_pmu_enable_all(int added); + +int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int gpmax, int *assign); +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); + +void x86_pmu_stop(struct perf_event *event, int flags); + +static inline void x86_pmu_disable_event(struct perf_event *event) +{ + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config & ~disable_mask); + + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); +} + +void x86_pmu_enable_event(struct perf_event *event); + +int x86_pmu_handle_irq(struct pt_regs *regs); + +void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, + u64 intel_ctrl); + +void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu); + +extern struct event_constraint emptyconstraint; + +extern struct event_constraint unconstrained; + +static inline bool kernel_ip(unsigned long ip) +{ +#ifdef CONFIG_X86_32 + return ip > PAGE_OFFSET; +#else + return (long)ip < 0; +#endif +} + +/* + * Not all PMUs provide the right context information to place the reported IP + * into full context. Specifically segment registers are typically not + * supplied. + * + * Assuming the address is a linear address (it is for IBS), we fake the CS and + * vm86 mode using the known zero-based code segment and 'fix up' the registers + * to reflect this. + * + * Intel PEBS/LBR appear to typically provide the effective address, nothing + * much we can do about that but pray and treat it like a linear address. + */ +static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) +{ + regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; + if (regs->flags & X86_VM_MASK) + regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); + regs->ip = ip; +} + +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ + X86_BR_CALL_STACK = 1 << 16,/* call stack */ + X86_BR_IND_JMP = 1 << 17,/* indirect jump */ + + X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ + +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_ABORT |\ + X86_BR_IND_CALL |\ + X86_BR_IND_JMP |\ + X86_BR_ZERO_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +int common_branch_type(int type); +int branch_type(unsigned long from, unsigned long to, int abort); +int branch_type_fused(unsigned long from, unsigned long to, int abort, + int *offset); + +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); +ssize_t intel_event_sysfs_show(char *page, u64 config); + +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); +ssize_t events_hybrid_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); + +static inline bool fixed_counter_disabled(int i, struct pmu *pmu) +{ + u64 intel_ctrl = hybrid(pmu, intel_ctrl); + + return !(intel_ctrl >> (i + INTEL_PMC_IDX_FIXED)); +} + +#ifdef CONFIG_CPU_SUP_AMD + +int amd_pmu_init(void); + +int amd_pmu_lbr_init(void); +void amd_pmu_lbr_reset(void); +void amd_pmu_lbr_read(void); +void amd_pmu_lbr_add(struct perf_event *event); +void amd_pmu_lbr_del(struct perf_event *event); +void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void amd_pmu_lbr_enable_all(void); +void amd_pmu_lbr_disable_all(void); +int amd_pmu_lbr_hw_config(struct perf_event *event); + +#ifdef CONFIG_PERF_EVENTS_AMD_BRS + +#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ + +int amd_brs_init(void); +void amd_brs_disable(void); +void amd_brs_enable(void); +void amd_brs_enable_all(void); +void amd_brs_disable_all(void); +void amd_brs_drain(void); +void amd_brs_lopwr_init(void); +void amd_brs_disable_all(void); +int amd_brs_hw_config(struct perf_event *event); +void amd_brs_reset(void); + +static inline void amd_pmu_brs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + perf_sched_cb_inc(event->ctx->pmu); + cpuc->lbr_users++; + /* + * No need to reset BRS because it is reset + * on brs_enable() and it is saturating + */ +} + +static inline void amd_pmu_brs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + + perf_sched_cb_dec(event->ctx->pmu); +} + +void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in); +#else +static inline int amd_brs_init(void) +{ + return 0; +} +static inline void amd_brs_disable(void) {} +static inline void amd_brs_enable(void) {} +static inline void amd_brs_drain(void) {} +static inline void amd_brs_lopwr_init(void) {} +static inline void amd_brs_disable_all(void) {} +static inline int amd_brs_hw_config(struct perf_event *event) +{ + return 0; +} +static inline void amd_brs_reset(void) {} + +static inline void amd_pmu_brs_add(struct perf_event *event) +{ +} + +static inline void amd_pmu_brs_del(struct perf_event *event) +{ +} + +static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +{ +} + +static inline void amd_brs_enable_all(void) +{ +} + +#endif + +#else /* CONFIG_CPU_SUP_AMD */ + +static inline int amd_pmu_init(void) +{ + return 0; +} + +static inline int amd_brs_init(void) +{ + return -EOPNOTSUPP; +} + +static inline void amd_brs_drain(void) +{ +} + +static inline void amd_brs_enable_all(void) +{ +} + +static inline void amd_brs_disable_all(void) +{ +} +#endif /* CONFIG_CPU_SUP_AMD */ + +static inline int is_pebs_pt(struct perf_event *event) +{ + return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT); +} + +#ifdef CONFIG_CPU_SUP_INTEL + +static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; + + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + + return hw_event == bts_event && period == 1; +} + +static inline bool intel_pmu_has_bts(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return intel_pmu_has_bts_period(event, hwc->sample_period); +} + +static __always_inline void __intel_pmu_pebs_disable_all(void) +{ + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); +} + +static __always_inline void __intel_pmu_arch_lbr_disable(void) +{ + wrmsrl(MSR_ARCH_LBR_CTL, 0); +} + +static __always_inline void __intel_pmu_lbr_disable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +int intel_pmu_save_and_restart(struct perf_event *event); + +struct event_constraint * +x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event); + +extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); +extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); + +int intel_pmu_init(void); + +void init_debug_store_on_cpu(int cpu); + +void fini_debug_store_on_cpu(int cpu); + +void release_ds_buffers(void); + +void reserve_ds_buffers(void); + +void release_lbr_buffers(void); + +void reserve_lbr_buffers(void); + +extern struct event_constraint bts_constraint; +extern struct event_constraint vlbr_constraint; + +void intel_pmu_enable_bts(u64 config); + +void intel_pmu_disable_bts(void); + +int intel_pmu_drain_bts_buffer(void); + +u64 adl_latency_data_small(struct perf_event *event, u64 status); + +extern struct event_constraint intel_core2_pebs_event_constraints[]; + +extern struct event_constraint intel_atom_pebs_event_constraints[]; + +extern struct event_constraint intel_slm_pebs_event_constraints[]; + +extern struct event_constraint intel_glm_pebs_event_constraints[]; + +extern struct event_constraint intel_glp_pebs_event_constraints[]; + +extern struct event_constraint intel_grt_pebs_event_constraints[]; + +extern struct event_constraint intel_nehalem_pebs_event_constraints[]; + +extern struct event_constraint intel_westmere_pebs_event_constraints[]; + +extern struct event_constraint intel_snb_pebs_event_constraints[]; + +extern struct event_constraint intel_ivb_pebs_event_constraints[]; + +extern struct event_constraint intel_hsw_pebs_event_constraints[]; + +extern struct event_constraint intel_bdw_pebs_event_constraints[]; + +extern struct event_constraint intel_skl_pebs_event_constraints[]; + +extern struct event_constraint intel_icl_pebs_event_constraints[]; + +extern struct event_constraint intel_spr_pebs_event_constraints[]; + +struct event_constraint *intel_pebs_constraints(struct perf_event *event); + +void intel_pmu_pebs_add(struct perf_event *event); + +void intel_pmu_pebs_del(struct perf_event *event); + +void intel_pmu_pebs_enable(struct perf_event *event); + +void intel_pmu_pebs_disable(struct perf_event *event); + +void intel_pmu_pebs_enable_all(void); + +void intel_pmu_pebs_disable_all(void); + +void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); + +void intel_pmu_auto_reload_read(struct perf_event *event); + +void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); + +void intel_ds_init(void); + +void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, + struct perf_event_context *next); + +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + +u64 lbr_from_signext_quirk_wr(u64 val); + +void intel_pmu_lbr_reset(void); + +void intel_pmu_lbr_reset_32(void); + +void intel_pmu_lbr_reset_64(void); + +void intel_pmu_lbr_add(struct perf_event *event); + +void intel_pmu_lbr_del(struct perf_event *event); + +void intel_pmu_lbr_enable_all(bool pmi); + +void intel_pmu_lbr_disable_all(void); + +void intel_pmu_lbr_read(void); + +void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc); + +void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc); + +void intel_pmu_lbr_save(void *ctx); + +void intel_pmu_lbr_restore(void *ctx); + +void intel_pmu_lbr_init_core(void); + +void intel_pmu_lbr_init_nhm(void); + +void intel_pmu_lbr_init_atom(void); + +void intel_pmu_lbr_init_slm(void); + +void intel_pmu_lbr_init_snb(void); + +void intel_pmu_lbr_init_hsw(void); + +void intel_pmu_lbr_init_skl(void); + +void intel_pmu_lbr_init_knl(void); + +void intel_pmu_lbr_init(void); + +void intel_pmu_arch_lbr_init(void); + +void intel_pmu_pebs_data_source_nhm(void); + +void intel_pmu_pebs_data_source_skl(bool pmem); + +void intel_pmu_pebs_data_source_adl(void); + +void intel_pmu_pebs_data_source_grt(void); + +int intel_pmu_setup_lbr_filter(struct perf_event *event); + +void intel_pt_interrupt(void); + +int intel_bts_interrupt(void); + +void intel_bts_enable_local(void); + +void intel_bts_disable_local(void); + +int p4_pmu_init(void); + +int p6_pmu_init(void); + +int knc_pmu_init(void); + +static inline int is_ht_workaround_enabled(void) +{ + return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); +} + +#else /* CONFIG_CPU_SUP_INTEL */ + +static inline void reserve_ds_buffers(void) +{ +} + +static inline void release_ds_buffers(void) +{ +} + +static inline void release_lbr_buffers(void) +{ +} + +static inline void reserve_lbr_buffers(void) +{ +} + +static inline int intel_pmu_init(void) +{ + return 0; +} + +static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) +{ + return 0; +} + +static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc) +{ +} + +static inline int is_ht_workaround_enabled(void) +{ + return 0; +} +#endif /* CONFIG_CPU_SUP_INTEL */ + +#if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN)) +int zhaoxin_pmu_init(void); +#else +static inline int zhaoxin_pmu_init(void) +{ + return 0; +} +#endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/ diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h new file mode 100644 index 000000000..1dc19b9b4 --- /dev/null +++ b/arch/x86/events/perf_event_flags.h @@ -0,0 +1,22 @@ + +/* + * struct hw_perf_event.flags flags + */ +PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */ +PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */ +PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */ +PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */ +PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */ +PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */ +PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */ + /* 0x00080 */ +PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */ +PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */ +PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */ +PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */ +PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */ +PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */ +PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */ +PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ +PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ +PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c new file mode 100644 index 000000000..600bf8d15 --- /dev/null +++ b/arch/x86/events/probe.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "probe.h" + +static umode_t +not_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return 0; +} + +/* + * Accepts msr[] array with non populated entries as long as either + * msr[i].msr is 0 or msr[i].grp is NULL. Note that the default sysfs + * visibility is visible when group->is_visible callback is set. + */ +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) +{ + unsigned long avail = 0; + unsigned int bit; + u64 val; + + if (cnt >= BITS_PER_LONG) + return 0; + + for (bit = 0; bit < cnt; bit++) { + if (!msr[bit].no_check) { + struct attribute_group *grp = msr[bit].grp; + u64 mask; + + /* skip entry with no group */ + if (!grp) + continue; + + grp->is_visible = not_visible; + + /* skip unpopulated entry */ + if (!msr[bit].msr) + continue; + + if (msr[bit].test && !msr[bit].test(bit, data)) + continue; + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ + if (rdmsrl_safe(msr[bit].msr, &val)) + continue; + + mask = msr[bit].mask; + if (!mask) + mask = ~0ULL; + /* Disable zero counters if requested. */ + if (!zero && !(val & mask)) + continue; + + grp->is_visible = NULL; + } + avail |= BIT(bit); + } + + return avail; +} +EXPORT_SYMBOL_GPL(perf_msr_probe); diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h new file mode 100644 index 000000000..261b9bda2 --- /dev/null +++ b/arch/x86/events/probe.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_X86_EVENTS_PROBE_H__ +#define __ARCH_X86_EVENTS_PROBE_H__ +#include + +struct perf_msr { + u64 msr; + struct attribute_group *grp; + bool (*test)(int idx, void *data); + bool no_check; + u64 mask; +}; + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data); + +#define __PMU_EVENT_GROUP(_name) \ +static struct attribute *attrs_##_name[] = { \ + &attr_##_name.attr.attr, \ + NULL, \ +} + +#define PMU_EVENT_GROUP(_grp, _name) \ +__PMU_EVENT_GROUP(_name); \ +static struct attribute_group group_##_name = { \ + .name = #_grp, \ + .attrs = attrs_##_name, \ +} + +#endif /* __ARCH_X86_EVENTS_PROBE_H__ */ diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c new file mode 100644 index 000000000..52e6e7ed4 --- /dev/null +++ b/arch/x86/events/rapl.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support Intel/AMD RAPL energy consumption counters + * Copyright (C) 2013 Google, Inc., Stephane Eranian + * + * Intel RAPL interface is specified in the IA-32 Manual Vol3b + * section 14.7.1 (September 2013) + * + * AMD RAPL interface for Fam17h is described in the public PPR: + * https://bugzilla.kernel.org/show_bug.cgi?id=206537 + * + * RAPL provides more controls than just reporting energy consumption + * however here we only expose the 3 energy consumption free running + * counters (pp0, pkg, dram). + * + * Each of those counters increments in a power unit defined by the + * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules + * but it can vary. + * + * Counter to rapl events mappings: + * + * pp0 counter: consumption of all physical cores (power plane 0) + * event: rapl_energy_cores + * perf code: 0x1 + * + * pkg counter: consumption of the whole processor package + * event: rapl_energy_pkg + * perf code: 0x2 + * + * dram counter: consumption of the dram domain (servers only) + * event: rapl_energy_dram + * perf code: 0x3 + * + * gpu counter: consumption of the builtin-gpu domain (client only) + * event: rapl_energy_gpu + * perf code: 0x4 + * + * psys counter: consumption of the builtin-psys domain (client only) + * event: rapl_energy_psys + * perf code: 0x5 + * + * We manage those counters as free running (read-only). They may be + * use simultaneously by other tools, such as turbostat. + * + * The events only support system-wide mode counting. There is no + * sampling support because it does not make sense and is not + * supported by the RAPL hardware. + * + * Because we want to avoid floating-point operations in the kernel, + * the events are all reported in fixed point arithmetic (32.32). + * Tools must adjust the counts to convert them to Watts using + * the duration of the measurement. Tools may use a function such as + * ldexp(raw_count, -32); + */ + +#define pr_fmt(fmt) "RAPL PMU: " fmt + +#include +#include +#include +#include +#include +#include +#include "perf_event.h" +#include "probe.h" + +MODULE_LICENSE("GPL"); + +/* + * RAPL energy status counters + */ +enum perf_rapl_events { + PERF_RAPL_PP0 = 0, /* all cores */ + PERF_RAPL_PKG, /* entire package */ + PERF_RAPL_RAM, /* DRAM */ + PERF_RAPL_PP1, /* gpu */ + PERF_RAPL_PSYS, /* psys */ + + PERF_RAPL_MAX, + NR_RAPL_DOMAINS = PERF_RAPL_MAX, +}; + +static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { + "pp0-core", + "package", + "dram", + "pp1-gpu", + "psys", +}; + +/* + * event code: LSB 8 bits, passed in attr->config + * any other bit is reserved + */ +#define RAPL_EVENT_MASK 0xFFULL +#define RAPL_CNTR_WIDTH 32 + +#define RAPL_EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +}; + +struct rapl_pmu { + raw_spinlock_t lock; + int n_active; + int cpu; + struct list_head active_list; + struct pmu *pmu; + ktime_t timer_interval; + struct hrtimer hrtimer; +}; + +struct rapl_pmus { + struct pmu pmu; + unsigned int maxdie; + struct rapl_pmu *pmus[]; +}; + +enum rapl_unit_quirk { + RAPL_UNIT_QUIRK_NONE, + RAPL_UNIT_QUIRK_INTEL_HSW, + RAPL_UNIT_QUIRK_INTEL_SPR, +}; + +struct rapl_model { + struct perf_msr *rapl_msrs; + unsigned long events; + unsigned int msr_power_unit; + enum rapl_unit_quirk unit_quirk; +}; + + /* 1/2^hw_unit Joule */ +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; +static struct rapl_pmus *rapl_pmus; +static cpumask_t rapl_cpu_mask; +static unsigned int rapl_cntr_mask; +static u64 rapl_timer_ms; +static struct perf_msr *rapl_msrs; + +static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +{ + unsigned int dieid = topology_logical_die_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL; +} + +static inline u64 rapl_read_counter(struct perf_event *event) +{ + u64 raw; + rdmsrl(event->hw.event_base, raw); + return raw; +} + +static inline u64 rapl_scale(u64 v, int cfg) +{ + if (cfg > NR_RAPL_DOMAINS) { + pr_warn("Invalid domain %d, failed to scale data\n", cfg); + return v; + } + /* + * scale delta to smallest unit (1/2^32) + * users must then scale back: count * 1/(1e9*2^32) to get Joules + * or use ldexp(count, -32). + * Watts = Joules/Time delta + */ + return v << (32 - rapl_hw_unit[cfg - 1]); +} + +static u64 rapl_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + s64 delta, sdelta; + int shift = RAPL_CNTR_WIDTH; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + rdmsrl(event->hw.event_base, new_raw_count); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) { + cpu_relax(); + goto again; + } + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + sdelta = rapl_scale(delta, event->hw.config); + + local64_add(sdelta, &event->count); + + return new_raw_count; +} + +static void rapl_start_hrtimer(struct rapl_pmu *pmu) +{ + hrtimer_start(&pmu->hrtimer, pmu->timer_interval, + HRTIMER_MODE_REL_PINNED); +} + +static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); + struct perf_event *event; + unsigned long flags; + + if (!pmu->n_active) + return HRTIMER_NORESTART; + + raw_spin_lock_irqsave(&pmu->lock, flags); + + list_for_each_entry(event, &pmu->active_list, active_entry) + rapl_event_update(event); + + raw_spin_unlock_irqrestore(&pmu->lock, flags); + + hrtimer_forward_now(hrtimer, pmu->timer_interval); + + return HRTIMER_RESTART; +} + +static void rapl_hrtimer_init(struct rapl_pmu *pmu) +{ + struct hrtimer *hr = &pmu->hrtimer; + + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hr->function = rapl_hrtimer_handle; +} + +static void __rapl_pmu_event_start(struct rapl_pmu *pmu, + struct perf_event *event) +{ + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + + list_add_tail(&event->active_entry, &pmu->active_list); + + local64_set(&event->hw.prev_count, rapl_read_counter(event)); + + pmu->n_active++; + if (pmu->n_active == 1) + rapl_start_hrtimer(pmu); +} + +static void rapl_pmu_event_start(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = event->pmu_private; + unsigned long flags; + + raw_spin_lock_irqsave(&pmu->lock, flags); + __rapl_pmu_event_start(pmu, event); + raw_spin_unlock_irqrestore(&pmu->lock, flags); +} + +static void rapl_pmu_event_stop(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = event->pmu_private; + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + raw_spin_lock_irqsave(&pmu->lock, flags); + + /* mark event as deactivated and stopped */ + if (!(hwc->state & PERF_HES_STOPPED)) { + WARN_ON_ONCE(pmu->n_active <= 0); + pmu->n_active--; + if (pmu->n_active == 0) + hrtimer_cancel(&pmu->hrtimer); + + list_del(&event->active_entry); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + /* check if update of sw counter is necessary */ + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + rapl_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } + + raw_spin_unlock_irqrestore(&pmu->lock, flags); +} + +static int rapl_pmu_event_add(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = event->pmu_private; + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + raw_spin_lock_irqsave(&pmu->lock, flags); + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (mode & PERF_EF_START) + __rapl_pmu_event_start(pmu, event); + + raw_spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static void rapl_pmu_event_del(struct perf_event *event, int flags) +{ + rapl_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int rapl_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config & RAPL_EVENT_MASK; + int bit, ret = 0; + struct rapl_pmu *pmu; + + /* only look at RAPL events */ + if (event->attr.type != rapl_pmus->pmu.type) + return -ENOENT; + + /* check only supported bits are set */ + if (event->attr.config & ~RAPL_EVENT_MASK) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + + if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) + return -EINVAL; + + cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); + bit = cfg - 1; + + /* check event supported */ + if (!(rapl_cntr_mask & (1 << bit))) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* must be done before validate_group */ + pmu = cpu_to_rapl_pmu(event->cpu); + if (!pmu) + return -EINVAL; + event->cpu = pmu->cpu; + event->pmu_private = pmu; + event->hw.event_base = rapl_msrs[bit].msr; + event->hw.config = cfg; + event->hw.idx = bit; + + return ret; +} + +static void rapl_pmu_event_read(struct perf_event *event) +{ + rapl_event_update(event); +} + +static ssize_t rapl_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); + +static struct attribute *rapl_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_attr_group = { + .attrs = rapl_pmu_attrs, +}; + +RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); +RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); +RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); +RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); +RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); + +RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); + +/* + * we compute in 0.23 nJ increments regardless of MSR + */ +RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); + +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ +static struct attribute *attrs_empty[] = { + NULL, +}; + +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = attrs_empty, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; + +static const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + +static struct attribute *rapl_events_cores[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_cores_scale), + NULL, +}; + +static struct attribute_group rapl_events_cores_group = { + .name = "events", + .attrs = rapl_events_cores, +}; + +static struct attribute *rapl_events_pkg[] = { + EVENT_PTR(rapl_pkg), + EVENT_PTR(rapl_pkg_unit), + EVENT_PTR(rapl_pkg_scale), + NULL, +}; + +static struct attribute_group rapl_events_pkg_group = { + .name = "events", + .attrs = rapl_events_pkg, +}; + +static struct attribute *rapl_events_ram[] = { + EVENT_PTR(rapl_ram), + EVENT_PTR(rapl_ram_unit), + EVENT_PTR(rapl_ram_scale), + NULL, +}; + +static struct attribute_group rapl_events_ram_group = { + .name = "events", + .attrs = rapl_events_ram, +}; + +static struct attribute *rapl_events_gpu[] = { + EVENT_PTR(rapl_gpu), + EVENT_PTR(rapl_gpu_unit), + EVENT_PTR(rapl_gpu_scale), + NULL, +}; + +static struct attribute_group rapl_events_gpu_group = { + .name = "events", + .attrs = rapl_events_gpu, +}; + +static struct attribute *rapl_events_psys[] = { + EVENT_PTR(rapl_psys), + EVENT_PTR(rapl_psys_unit), + EVENT_PTR(rapl_psys_scale), + NULL, +}; + +static struct attribute_group rapl_events_psys_group = { + .name = "events", + .attrs = rapl_events_psys, +}; + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +/* Only lower 32bits of the MSR represents the energy counter */ +#define RAPL_MSR_MASK 0xFFFFFFFF + +static struct perf_msr intel_rapl_msrs[] = { + [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK }, +}; + +static struct perf_msr intel_rapl_spr_msrs[] = { + [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, true, RAPL_MSR_MASK }, +}; + +/* + * Force to PERF_RAPL_MAX size due to: + * - perf_msr_probe(PERF_RAPL_MAX) + * - want to use same event codes across both architectures + */ +static struct perf_msr amd_rapl_msrs[] = { + [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 }, + [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 }, + [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 }, + [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 }, +}; + +static int rapl_cpu_offline(unsigned int cpu) +{ + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; + + /* Check if exiting cpu is used for collecting rapl events */ + if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) + return 0; + + pmu->cpu = -1; + /* Find a new cpu to collect rapl events */ + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); + + /* Migrate rapl events to the new target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &rapl_cpu_mask); + pmu->cpu = target; + perf_pmu_migrate_context(pmu->pmu, cpu, target); + } + return 0; +} + +static int rapl_cpu_online(unsigned int cpu) +{ + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; + + if (!pmu) { + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -ENOMEM; + + raw_spin_lock_init(&pmu->lock); + INIT_LIST_HEAD(&pmu->active_list); + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(pmu); + + rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; + } + + /* + * Check if there is an online cpu in the package which collects rapl + * events already. + */ + target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); + if (target < nr_cpu_ids) + return 0; + + cpumask_set_cpu(cpu, &rapl_cpu_mask); + pmu->cpu = cpu; + return 0; +} + +static int rapl_check_hw_unit(struct rapl_model *rm) +{ + u64 msr_rapl_power_unit_bits; + int i; + + /* protect rdmsrl() to handle virtualization */ + if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits)) + return -1; + for (i = 0; i < NR_RAPL_DOMAINS; i++) + rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + + switch (rm->unit_quirk) { + /* + * DRAM domain on HSW server and KNL has fixed energy unit which can be + * different than the unit from power unit MSR. See + * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 + * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + */ + case RAPL_UNIT_QUIRK_INTEL_HSW: + rapl_hw_unit[PERF_RAPL_RAM] = 16; + break; + /* SPR uses a fixed energy unit for Psys domain. */ + case RAPL_UNIT_QUIRK_INTEL_SPR: + rapl_hw_unit[PERF_RAPL_PSYS] = 0; + break; + default: + break; + } + + + /* + * Calculate the timer rate: + * Use reference of 200W for scaling the timeout to avoid counter + * overflows. 200W = 200 Joules/sec + * Divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + rapl_timer_ms = 2; + if (rapl_hw_unit[0] < 32) { + rapl_timer_ms = (1000 / (2 * 100)); + rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); + } + return 0; +} + +static void __init rapl_advertise(void) +{ + int i; + + pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", + hweight32(rapl_cntr_mask), rapl_timer_ms); + + for (i = 0; i < NR_RAPL_DOMAINS; i++) { + if (rapl_cntr_mask & (1 << i)) { + pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_domain_names[i], rapl_hw_unit[i]); + } + } +} + +static void cleanup_rapl_pmus(void) +{ + int i; + + for (i = 0; i < rapl_pmus->maxdie; i++) + kfree(rapl_pmus->pmus[i]); + kfree(rapl_pmus); +} + +static const struct attribute_group *rapl_attr_update[] = { + &rapl_events_cores_group, + &rapl_events_pkg_group, + &rapl_events_ram_group, + &rapl_events_gpu_group, + &rapl_events_psys_group, + NULL, +}; + +static int __init init_rapl_pmus(void) +{ + int maxdie = topology_max_packages() * topology_max_die_per_package(); + size_t size; + + size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); + rapl_pmus = kzalloc(size, GFP_KERNEL); + if (!rapl_pmus) + return -ENOMEM; + + rapl_pmus->maxdie = maxdie; + rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.attr_update = rapl_attr_update; + rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; + rapl_pmus->pmu.event_init = rapl_pmu_event_init; + rapl_pmus->pmu.add = rapl_pmu_event_add; + rapl_pmus->pmu.del = rapl_pmu_event_del; + rapl_pmus->pmu.start = rapl_pmu_event_start; + rapl_pmus->pmu.stop = rapl_pmu_event_stop; + rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.module = THIS_MODULE; + rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + return 0; +} + +static struct rapl_model model_snb = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_PP1), + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_snbep = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_hsw = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1), + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_hsx = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_knl = { + .events = BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_skl = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1) | + BIT(PERF_RAPL_PSYS), + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_spr = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PSYS), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_spr_msrs, +}; + +static struct rapl_model model_amd_hygon = { + .events = BIT(PERF_RAPL_PKG), + .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, + .rapl_msrs = amd_rapl_msrs, +}; + +static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), + {}, +}; +MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); + +static int __init rapl_pmu_init(void) +{ + const struct x86_cpu_id *id; + struct rapl_model *rm; + int ret; + + id = x86_match_cpu(rapl_model_match); + if (!id) + return -ENODEV; + + rm = (struct rapl_model *) id->driver_data; + + rapl_msrs = rm->rapl_msrs; + + rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, + false, (void *) &rm->events); + + ret = rapl_check_hw_unit(rm); + if (ret) + return ret; + + ret = init_rapl_pmus(); + if (ret) + return ret; + + /* + * Install callbacks. Core will call them for each online cpu. + */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, + "perf/x86/rapl:online", + rapl_cpu_online, rapl_cpu_offline); + if (ret) + goto out; + + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); + if (ret) + goto out1; + + rapl_advertise(); + return 0; + +out1: + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); +out: + pr_warn("Initialization failed (%d), disabled\n", ret); + cleanup_rapl_pmus(); + return ret; +} +module_init(rapl_pmu_init); + +static void __exit intel_rapl_exit(void) +{ + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); + perf_pmu_unregister(&rapl_pmus->pmu); + cleanup_rapl_pmus(); +} +module_exit(intel_rapl_exit); diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c new file mode 100644 index 000000000..dab4ed199 --- /dev/null +++ b/arch/x86/events/utils.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include "perf_event.h" + +static int decode_branch_type(struct insn *insn) +{ + int ext; + + if (insn_get_opcode(insn)) + return X86_BR_ABORT; + + switch (insn->opcode.bytes[0]) { + case 0xf: + switch (insn->opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + return X86_BR_SYSCALL; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + return X86_BR_SYSRET; + case 0x80 ... 0x8f: /* conditional */ + return X86_BR_JCC; + } + return X86_BR_NONE; + case 0x70 ... 0x7f: /* conditional */ + return X86_BR_JCC; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + return X86_BR_RET; + case 0xcf: /* iret */ + return X86_BR_IRET; + case 0xcc ... 0xce: /* int */ + return X86_BR_INT; + case 0xe8: /* call near rel */ + if (insn_get_immediate(insn) || insn->immediate1.value == 0) { + /* zero length call */ + return X86_BR_ZERO_CALL; + } + fallthrough; + case 0x9a: /* call far absolute */ + return X86_BR_CALL; + case 0xe0 ... 0xe3: /* loop jmp */ + return X86_BR_JCC; + case 0xe9 ... 0xeb: /* jmp */ + return X86_BR_JMP; + case 0xff: /* call near absolute, call far absolute ind */ + if (insn_get_modrm(insn)) + return X86_BR_ABORT; + + ext = (insn->modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + return X86_BR_IND_CALL; + case 4: + case 5: + return X86_BR_IND_JMP; + } + return X86_BR_NONE; + } + + return X86_BR_NONE; +} + +/* + * return the type of control flow change at address "from" + * instruction is not necessarily a branch (in case of interrupt). + * + * The branch type returned also includes the priv level of the + * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). + * + * If a branch type is unknown OR the instruction cannot be + * decoded (e.g., text page not present), then X86_BR_NONE is + * returned. + * + * While recording branches, some processors can report the "from" + * address to be that of an instruction preceding the actual branch + * when instruction fusion occurs. If fusion is expected, attempt to + * find the type of the first branch instruction within the next + * MAX_INSN_SIZE bytes and if found, provide the offset between the + * reported "from" address and the actual branch instruction address. + */ +static int get_branch_type(unsigned long from, unsigned long to, int abort, + bool fused, int *offset) +{ + struct insn insn; + void *addr; + int bytes_read, bytes_left, insn_offset; + int ret = X86_BR_NONE; + int to_plm, from_plm; + u8 buf[MAX_INSN_SIZE]; + int is64 = 0; + + /* make sure we initialize offset */ + if (offset) + *offset = 0; + + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; + + /* + * maybe zero if lbr did not fill up after a reset by the time + * we get a PMU interrupt + */ + if (from == 0 || to == 0) + return X86_BR_NONE; + + if (abort) + return X86_BR_ABORT | to_plm; + + if (from_plm == X86_BR_USER) { + /* + * can happen if measuring at the user level only + * and we interrupt in a kernel thread, e.g., idle. + */ + if (!current->mm) + return X86_BR_NONE; + + /* may fail if text not present */ + bytes_left = copy_from_user_nmi(buf, (void __user *)from, + MAX_INSN_SIZE); + bytes_read = MAX_INSN_SIZE - bytes_left; + if (!bytes_read) + return X86_BR_NONE; + + addr = buf; + } else { + /* + * The LBR logs any address in the IP, even if the IP just + * faulted. This means userspace can control the from address. + * Ensure we don't blindly read any address by validating it is + * a known text address and not a vsyscall address. + */ + if (kernel_text_address(from) && !in_gate_area_no_mm(from)) { + addr = (void *)from; + /* + * Assume we can get the maximum possible size + * when grabbing kernel data. This is not + * _strictly_ true since we could possibly be + * executing up next to a memory hole, but + * it is very unlikely to be a problem. + */ + bytes_read = MAX_INSN_SIZE; + } else { + return X86_BR_NONE; + } + } + + /* + * decoder needs to know the ABI especially + * on 64-bit systems running 32-bit apps + */ +#ifdef CONFIG_X86_64 + is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs()); +#endif + insn_init(&insn, addr, bytes_read, is64); + ret = decode_branch_type(&insn); + insn_offset = 0; + + /* Check for the possibility of branch fusion */ + while (fused && ret == X86_BR_NONE) { + /* Check for decoding errors */ + if (insn_get_length(&insn) || !insn.length) + break; + + insn_offset += insn.length; + bytes_read -= insn.length; + if (bytes_read < 0) + break; + + insn_init(&insn, addr + insn_offset, bytes_read, is64); + ret = decode_branch_type(&insn); + } + + if (offset) + *offset = insn_offset; + + /* + * interrupts, traps, faults (and thus ring transition) may + * occur on any instructions. Thus, to classify them correctly, + * we need to first look at the from and to priv levels. If they + * are different and to is in the kernel, then it indicates + * a ring transition. If the from instruction is not a ring + * transition instr (syscall, systenter, int), then it means + * it was a irq, trap or fault. + * + * we have no way of detecting kernel to kernel faults. + */ + if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL + && ret != X86_BR_SYSCALL && ret != X86_BR_INT) + ret = X86_BR_IRQ; + + /* + * branch priv level determined by target as + * is done by HW when LBR_SELECT is implemented + */ + if (ret != X86_BR_NONE) + ret |= to_plm; + + return ret; +} + +int branch_type(unsigned long from, unsigned long to, int abort) +{ + return get_branch_type(from, to, abort, false, NULL); +} + +int branch_type_fused(unsigned long from, unsigned long to, int abort, + int *offset) +{ + return get_branch_type(from, to, abort, true, offset); +} + +#define X86_BR_TYPE_MAP_MAX 16 + +static int branch_map[X86_BR_TYPE_MAP_MAX] = { + PERF_BR_CALL, /* X86_BR_CALL */ + PERF_BR_RET, /* X86_BR_RET */ + PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ + PERF_BR_SYSRET, /* X86_BR_SYSRET */ + PERF_BR_UNKNOWN, /* X86_BR_INT */ + PERF_BR_ERET, /* X86_BR_IRET */ + PERF_BR_COND, /* X86_BR_JCC */ + PERF_BR_UNCOND, /* X86_BR_JMP */ + PERF_BR_IRQ, /* X86_BR_IRQ */ + PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ + PERF_BR_UNKNOWN, /* X86_BR_ABORT */ + PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ + PERF_BR_NO_TX, /* X86_BR_NO_TX */ + PERF_BR_CALL, /* X86_BR_ZERO_CALL */ + PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */ + PERF_BR_IND, /* X86_BR_IND_JMP */ +}; + +int common_branch_type(int type) +{ + int i; + + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ + + if (type) { + i = __ffs(type); + if (i < X86_BR_TYPE_MAP_MAX) + return branch_map[i]; + } + + return PERF_BR_UNKNOWN; +} diff --git a/arch/x86/events/zhaoxin/Makefile b/arch/x86/events/zhaoxin/Makefile new file mode 100644 index 000000000..642c1174d --- /dev/null +++ b/arch/x86/events/zhaoxin/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y += core.o diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c new file mode 100644 index 000000000..3e9acdaee --- /dev/null +++ b/arch/x86/events/zhaoxin/core.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Zhaoxin PMU; like Intel Architectural PerfMon-v2 + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../perf_event.h" + +/* + * Zhaoxin PerfMon, used on zxc and later. + */ +static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = { + + [PERF_COUNT_HW_CPU_CYCLES] = 0x0082, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515, + [PERF_COUNT_HW_CACHE_MISSES] = 0x051a, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0083, +}; + +static struct event_constraint zxc_event_constraints[] __read_mostly = { + + FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint zxd_event_constraints[] __read_mostly = { + + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */ + FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */ + FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */ + EVENT_CONSTRAINT_END +}; + +static __initconst const u64 zxd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0042, + [C(RESULT_MISS)] = 0x0538, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0043, + [C(RESULT_MISS)] = 0x0562, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0300, + [C(RESULT_MISS)] = 0x0301, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x030a, + [C(RESULT_MISS)] = 0x030b, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0042, + [C(RESULT_MISS)] = 0x052c, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0043, + [C(RESULT_MISS)] = 0x0530, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0564, + [C(RESULT_MISS)] = 0x0565, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, + [C(RESULT_MISS)] = 0x0534, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0700, + [C(RESULT_MISS)] = 0x0709, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +}; + +static __initconst const u64 zxe_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0568, + [C(RESULT_MISS)] = 0x054b, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0669, + [C(RESULT_MISS)] = 0x0562, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0300, + [C(RESULT_MISS)] = 0x0301, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x030a, + [C(RESULT_MISS)] = 0x030b, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0568, + [C(RESULT_MISS)] = 0x052c, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0669, + [C(RESULT_MISS)] = 0x0530, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0564, + [C(RESULT_MISS)] = 0x0565, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, + [C(RESULT_MISS)] = 0x0534, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0028, + [C(RESULT_MISS)] = 0x0029, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +}; + +static void zhaoxin_pmu_disable_all(void) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); +} + +static void zhaoxin_pmu_enable_all(int added) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); +} + +static inline u64 zhaoxin_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void zhaoxin_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void zxc_pmu_ack_status(u64 ack) +{ + /* + * ZXC needs global control enabled in order to clear status bits. + */ + zhaoxin_pmu_enable_all(0); + zhaoxin_pmu_ack_status(ack); + zhaoxin_pmu_disable_all(); +} + +static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc) +{ + int idx = hwc->idx - INTEL_PMC_IDX_FIXED; + u64 ctrl_val, mask; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + wrmsrl(hwc->config_base, ctrl_val); +} + +static void zhaoxin_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + zhaoxin_pmu_disable_fixed(hwc); + return; + } + + x86_pmu_disable_event(event); +} + +static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc) +{ + int idx = hwc->idx - INTEL_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + + /* + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + wrmsrl(hwc->config_base, ctrl_val); +} + +static void zhaoxin_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + zhaoxin_pmu_enable_fixed(hwc); + return; + } + + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int zhaoxin_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int handled = 0; + u64 status; + int bit; + + cpuc = this_cpu_ptr(&cpu_hw_events); + apic_write(APIC_LVTPC, APIC_DM_NMI); + zhaoxin_pmu_disable_all(); + status = zhaoxin_pmu_get_status(); + if (!status) + goto done; + +again: + if (x86_pmu.enabled_ack) + zxc_pmu_ack_status(status); + else + zhaoxin_pmu_ack_status(status); + + inc_irq_stat(apic_perf_irqs); + + /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + x86_perf_event_update(event); + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (!x86_perf_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * Repeat if there is more work to be done: + */ + status = zhaoxin_pmu_get_status(); + if (status) + goto again; + +done: + zhaoxin_pmu_enable_all(0); + return handled; +} + +static u64 zhaoxin_pmu_event_map(int hw_event) +{ + return zx_pmon_event_map[hw_event]; +} + +static struct event_constraint * +zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &unconstrained; +} + +PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(umask, "config:8-15"); +PMU_FORMAT_ATTR(edge, "config:18"); +PMU_FORMAT_ATTR(inv, "config:23"); +PMU_FORMAT_ATTR(cmask, "config:24-31"); + +static struct attribute *zx_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); + + return x86_event_sysfs_show(page, config, event); +} + +static const struct x86_pmu zhaoxin_pmu __initconst = { + .name = "zhaoxin", + .handle_irq = zhaoxin_pmu_handle_irq, + .disable_all = zhaoxin_pmu_disable_all, + .enable_all = zhaoxin_pmu_enable_all, + .enable = zhaoxin_pmu_enable_event, + .disable = zhaoxin_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = zhaoxin_pmu_event_map, + .max_events = ARRAY_SIZE(zx_pmon_event_map), + .apic = 1, + /* + * For zxd/zxe, read/write operation for PMCx MSR is 48 bits. + */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = zhaoxin_get_event_constraints, + + .format_attrs = zx_arch_formats_attr, + .events_sysfs_show = zhaoxin_event_sysfs_show, +}; + +static const struct { int id; char *name; } zx_arch_events_map[] __initconst = { + { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, + { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, + { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, + { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, + { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, + { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, + { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, +}; + +static __init void zhaoxin_arch_events_quirk(void) +{ + int bit; + + /* disable event that reported as not present by cpuid */ + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) { + zx_pmon_event_map[zx_arch_events_map[bit].id] = 0; + pr_warn("CPUID marked event: \'%s\' unavailable\n", + zx_arch_events_map[bit].name); + } +} + +__init int zhaoxin_pmu_init(void) +{ + union cpuid10_edx edx; + union cpuid10_eax eax; + union cpuid10_ebx ebx; + struct event_constraint *c; + unsigned int unused; + int version; + + pr_info("Welcome to zhaoxin pmu!\n"); + + /* + * Check whether the Architectural PerfMon supports + * hw_event or not. + */ + cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + + if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1) + return -ENODEV; + + version = eax.split.version_id; + if (version != 2) + return -ENODEV; + + x86_pmu = zhaoxin_pmu; + pr_info("Version check pass!\n"); + + x86_pmu.version = version; + x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.cntval_bits = eax.split.bit_width; + x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.events_maskl = ebx.full; + x86_pmu.events_mask_len = eax.split.mask_length; + + x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; + x86_add_quirk(zhaoxin_arch_events_quirk); + + switch (boot_cpu_data.x86) { + case 0x06: + /* + * Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS. + * Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D] + * ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3 + */ + if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) || + boot_cpu_data.x86_model == 0x19) { + + x86_pmu.max_period = x86_pmu.cntval_mask >> 1; + + /* Clearing status works only if the global control is enable on zxc. */ + x86_pmu.enabled_ack = 1; + + x86_pmu.event_constraints = zxc_event_constraints; + zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0; + zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0; + zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0; + zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0; + + pr_cont("ZXC events, "); + break; + } + return -ENODEV; + + case 0x07: + zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01); + + zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0); + + switch (boot_cpu_data.x86_model) { + case 0x1b: + memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = zxd_event_constraints; + + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700; + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709; + + pr_cont("ZXD events, "); + break; + case 0x3b: + memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = zxd_event_constraints; + + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028; + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029; + + pr_cont("ZXE events, "); + break; + default: + return -ENODEV; + } + break; + + default: + return -ENODEV; + } + + x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1; + x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + c->weight += x86_pmu.num_counters; + } + } + + return 0; +} + diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile new file mode 100644 index 000000000..5d2de1080 --- /dev/null +++ b/arch/x86/hyperv/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o +obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o + +ifdef CONFIG_X86_64 +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o +endif diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c new file mode 100644 index 000000000..fb8b2c088 --- /dev/null +++ b/arch/x86/hyperv/hv_apic.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific APIC code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : K. Y. Srinivasan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static struct apic orig_apic; + +static u64 hv_apic_icr_read(void) +{ + u64 reg_val; + + rdmsrl(HV_X64_MSR_ICR, reg_val); + return reg_val; +} + +static void hv_apic_icr_write(u32 low, u32 id) +{ + u64 reg_val; + + reg_val = SET_XAPIC_DEST_FIELD(id); + reg_val = reg_val << 32; + reg_val |= low; + + wrmsrl(HV_X64_MSR_ICR, reg_val); +} + +static u32 hv_apic_read(u32 reg) +{ + u32 reg_val, hi; + + switch (reg) { + case APIC_EOI: + rdmsr(HV_X64_MSR_EOI, reg_val, hi); + (void)hi; + return reg_val; + case APIC_TASKPRI: + rdmsr(HV_X64_MSR_TPR, reg_val, hi); + (void)hi; + return reg_val; + + default: + return native_apic_mem_read(reg); + } +} + +static void hv_apic_write(u32 reg, u32 val) +{ + switch (reg) { + case APIC_EOI: + wrmsr(HV_X64_MSR_EOI, val, 0); + break; + case APIC_TASKPRI: + wrmsr(HV_X64_MSR_TPR, val, 0); + break; + default: + native_apic_mem_write(reg, val); + } +} + +static void hv_apic_eoi_write(u32 reg, u32 val) +{ + struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()]; + + if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1)) + return; + + wrmsr(HV_X64_MSR_EOI, val, 0); +} + +/* + * IPI implementation on Hyper-V. + */ +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, + bool exclude_self) +{ + struct hv_send_ipi_ex **arg; + struct hv_send_ipi_ex *ipi_arg; + unsigned long flags; + int nr_bank = 0; + u64 status = HV_STATUS_INVALID_PARAMETER; + + if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return false; + + local_irq_save(flags); + arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_ex_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->vp_set.valid_bank_mask = 0; + + /* + * Use HV_GENERIC_SET_ALL and avoid converting cpumask to VP_SET + * when the IPI is sent to all currently present CPUs. + */ + if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { + ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + if (exclude_self) + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask); + else + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + + /* + * 'nr_bank <= 0' means some CPUs in cpumask can't be + * represented in VP_SET. Return an error and fall back to + * native (architectural) method of sending IPIs. + */ + if (nr_bank <= 0) + goto ipi_mask_ex_done; + } else { + ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; + } + + status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + ipi_arg, NULL); + +ipi_mask_ex_done: + local_irq_restore(flags); + return hv_result_success(status); +} + +static bool __send_ipi_mask(const struct cpumask *mask, int vector, + bool exclude_self) +{ + int cur_cpu, vcpu, this_cpu = smp_processor_id(); + struct hv_send_ipi ipi_arg; + u64 status; + unsigned int weight; + + trace_hyperv_send_ipi_mask(mask, vector); + + weight = cpumask_weight(mask); + + /* + * Do nothing if + * 1. the mask is empty + * 2. the mask only contains self when exclude_self is true + */ + if (weight == 0 || + (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) + return true; + + if (!hv_hypercall_pg) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + /* + * From the supplied CPU set we need to figure out if we can get away + * with cheaper HVCALL_SEND_IPI hypercall. This is possible when the + * highest VP number in the set is < 64. As VP numbers are usually in + * ascending order and match Linux CPU ids, here is an optimization: + * we check the VP number for the highest bit in the supplied set first + * so we can quickly find out if using HVCALL_SEND_IPI_EX hypercall is + * a must. We will also check all VP numbers when walking the supplied + * CPU set to remain correct in all cases. + */ + if (hv_cpu_number_to_vp_number(cpumask_last(mask)) >= 64) + goto do_ex_hypercall; + + ipi_arg.vector = vector; + ipi_arg.cpu_mask = 0; + + for_each_cpu(cur_cpu, mask) { + if (exclude_self && cur_cpu == this_cpu) + continue; + vcpu = hv_cpu_number_to_vp_number(cur_cpu); + if (vcpu == VP_INVAL) + return false; + + /* + * This particular version of the IPI hypercall can + * only target upto 64 CPUs. + */ + if (vcpu >= 64) + goto do_ex_hypercall; + + __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask); + } + + status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, + ipi_arg.cpu_mask); + return hv_result_success(status); + +do_ex_hypercall: + return __send_ipi_mask_ex(mask, vector, exclude_self); +} + +static bool __send_ipi_one(int cpu, int vector) +{ + int vp = hv_cpu_number_to_vp_number(cpu); + u64 status; + + trace_hyperv_send_ipi_one(cpu, vector); + + if (!hv_hypercall_pg || (vp == VP_INVAL)) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + if (vp >= 64) + return __send_ipi_mask_ex(cpumask_of(cpu), vector, false); + + status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); + return hv_result_success(status); +} + +static void hv_send_ipi(int cpu, int vector) +{ + if (!__send_ipi_one(cpu, vector)) + orig_apic.send_IPI(cpu, vector); +} + +static void hv_send_ipi_mask(const struct cpumask *mask, int vector) +{ + if (!__send_ipi_mask(mask, vector, false)) + orig_apic.send_IPI_mask(mask, vector); +} + +static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + if (!__send_ipi_mask(mask, vector, true)) + orig_apic.send_IPI_mask_allbutself(mask, vector); +} + +static void hv_send_ipi_allbutself(int vector) +{ + hv_send_ipi_mask_allbutself(cpu_online_mask, vector); +} + +static void hv_send_ipi_all(int vector) +{ + if (!__send_ipi_mask(cpu_online_mask, vector, false)) + orig_apic.send_IPI_all(vector); +} + +static void hv_send_ipi_self(int vector) +{ + if (!__send_ipi_one(smp_processor_id(), vector)) + orig_apic.send_IPI_self(vector); +} + +void __init hv_apic_init(void) +{ + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { + pr_info("Hyper-V: Using IPI hypercalls\n"); + /* + * Set the IPI entry points. + */ + orig_apic = *apic; + + apic->send_IPI = hv_send_ipi; + apic->send_IPI_mask = hv_send_ipi_mask; + apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself; + apic->send_IPI_allbutself = hv_send_ipi_allbutself; + apic->send_IPI_all = hv_send_ipi_all; + apic->send_IPI_self = hv_send_ipi_self; + } + + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { + pr_info("Hyper-V: Using enlightened APIC (%s mode)", + x2apic_enabled() ? "x2apic" : "xapic"); + /* + * When in x2apic mode, don't use the Hyper-V specific APIC + * accessors since the field layout in the ICR register is + * different in x2apic mode. Furthermore, the architectural + * x2apic MSRs function just as well as the Hyper-V + * synthetic APIC MSRs, so there's no benefit in having + * separate Hyper-V accessors for x2apic mode. The only + * exception is hv_apic_eoi_write, because it benefits from + * lazy EOI when available, but the same accessor works for + * both xapic and x2apic because the field layout is the same. + */ + apic_set_eoi_write(hv_apic_eoi_write); + if (!x2apic_enabled()) { + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } + } +} diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c new file mode 100644 index 000000000..c18e5c764 --- /dev/null +++ b/arch/x86/hyperv/hv_init.c @@ -0,0 +1,650 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * X86 specific Hyper-V initialization code. + * + * Copyright (C) 2016, Microsoft, Inc. + * + * Author : K. Y. Srinivasan + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int hyperv_init_cpuhp; +u64 hv_current_partition_id = ~0ull; +EXPORT_SYMBOL_GPL(hv_current_partition_id); + +void *hv_hypercall_pg; +EXPORT_SYMBOL_GPL(hv_hypercall_pg); + +union hv_ghcb * __percpu *hv_ghcb_pg; + +/* Storage to save the hypercall page temporarily for hibernation */ +static void *hv_hypercall_pg_saved; + +struct hv_vp_assist_page **hv_vp_assist_page; +EXPORT_SYMBOL_GPL(hv_vp_assist_page); + +static int hyperv_init_ghcb(void) +{ + u64 ghcb_gpa; + void *ghcb_va; + void **ghcb_base; + + if (!hv_isolation_type_snp()) + return 0; + + if (!hv_ghcb_pg) + return -EINVAL; + + /* + * GHCB page is allocated by paravisor. The address + * returned by MSR_AMD64_SEV_ES_GHCB is above shared + * memory boundary and map it here. + */ + rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB); + if (!ghcb_va) + return -ENOMEM; + + ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); + *ghcb_base = ghcb_va; + + return 0; +} + +static int hv_cpu_init(unsigned int cpu) +{ + union hv_vp_assist_msr_contents msr = { 0 }; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; + int ret; + + ret = hv_common_cpu_init(cpu); + if (ret) + return ret; + + if (!hv_vp_assist_page) + return 0; + + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + if (!*hvp) + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + + } + if (!WARN_ON(!(*hvp))) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + } + + return hyperv_init_ghcb(); +} + +static void (*hv_reenlightenment_cb)(void); + +static void hv_reenlightenment_notify(struct work_struct *dummy) +{ + struct hv_tsc_emulation_status emu_status; + + rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + + /* Don't issue the callback if TSC accesses are not emulated */ + if (hv_reenlightenment_cb && emu_status.inprogress) + hv_reenlightenment_cb(); +} +static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify); + +void hyperv_stop_tsc_emulation(void) +{ + u64 freq; + struct hv_tsc_emulation_status emu_status; + + rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + emu_status.inprogress = 0; + wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); + tsc_khz = div64_u64(freq, 1000); +} +EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); + +static inline bool hv_reenlightenment_available(void) +{ + /* + * Check for required features and privileges to make TSC frequency + * change notifications work. + */ + return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && + ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && + ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT; +} + +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment) +{ + ack_APIC_irq(); + inc_irq_stat(irq_hv_reenlightenment_count); + schedule_delayed_work(&hv_reenlightenment_work, HZ/10); +} + +void set_hv_tscchange_cb(void (*cb)(void)) +{ + struct hv_reenlightenment_control re_ctrl = { + .vector = HYPERV_REENLIGHTENMENT_VECTOR, + .enabled = 1, + }; + struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; + + if (!hv_reenlightenment_available()) { + pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + return; + } + + if (!hv_vp_index) + return; + + hv_reenlightenment_cb = cb; + + /* Make sure callback is registered before we write to MSRs */ + wmb(); + + re_ctrl.target_vp = hv_vp_index[get_cpu()]; + + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); + + put_cpu(); +} +EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); + +void clear_hv_tscchange_cb(void) +{ + struct hv_reenlightenment_control re_ctrl; + + if (!hv_reenlightenment_available()) + return; + + rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + re_ctrl.enabled = 0; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + + hv_reenlightenment_cb = NULL; +} +EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); + +static int hv_cpu_die(unsigned int cpu) +{ + struct hv_reenlightenment_control re_ctrl; + unsigned int new_cpu; + void **ghcb_va; + + if (hv_ghcb_pg) { + ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg); + if (*ghcb_va) + memunmap(*ghcb_va); + *ghcb_va = NULL; + } + + hv_common_cpu_die(cpu); + + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) { + union hv_vp_assist_msr_contents msr = { 0 }; + if (hv_root_partition) { + /* + * For root partition the VP assist page is mapped to + * hypervisor provided page, and thus we unmap the + * page here and nullify it, so that in future we have + * correct page address mapped in hv_cpu_init. + */ + memunmap(hv_vp_assist_page[cpu]); + hv_vp_assist_page[cpu] = NULL; + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + msr.enable = 0; + } + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + } + + if (hv_reenlightenment_cb == NULL) + return 0; + + rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + if (re_ctrl.target_vp == hv_vp_index[cpu]) { + /* + * Reassign reenlightenment notifications to some other online + * CPU or just disable the feature if there are no online CPUs + * left (happens on hibernation). + */ + new_cpu = cpumask_any_but(cpu_online_mask, cpu); + + if (new_cpu < nr_cpu_ids) + re_ctrl.target_vp = hv_vp_index[new_cpu]; + else + re_ctrl.enabled = 0; + + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + } + + return 0; +} + +static int __init hv_pci_init(void) +{ + bool gen2vm = efi_enabled(EFI_BOOT); + + /* + * A Generation-2 VM doesn't support legacy PCI/PCIe, so both + * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> + * pcibios_init() doesn't call pcibios_resource_survey() -> + * e820__reserve_resources_late(); as a result, any emulated persistent + * memory of E820_TYPE_PRAM (12) via the kernel parameter + * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be + * detected by register_e820_pmem(). Fix this by directly calling + * e820__reserve_resources_late() here: e820__reserve_resources_late() + * depends on e820__reserve_resources(), which has been called earlier + * from setup_arch(). Note: e820__reserve_resources_late() also adds + * any memory of E820_TYPE_PMEM (7) into iomem_resource, and + * acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> + * region_intersects() returns REGION_INTERSECTS, so the memory of + * E820_TYPE_PMEM won't get added twice. + * + * We return 0 here so that pci_arch_init() won't print the warning: + * "PCI: Fatal: No config space access function found" + */ + if (gen2vm) { + e820__reserve_resources_late(); + return 0; + } + + /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ + return 1; +} + +static int hv_suspend(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + int ret; + + if (hv_root_partition) + return -EPERM; + + /* + * Reset the hypercall page as it is going to be invalidated + * across hibernation. Setting hv_hypercall_pg to NULL ensures + * that any subsequent hypercall operation fails safely instead of + * crashing due to an access of an invalid page. The hypercall page + * pointer is restored on resume. + */ + hv_hypercall_pg_saved = hv_hypercall_pg; + hv_hypercall_pg = NULL; + + /* Disable the hypercall page in the hypervisor */ + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 0; + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + ret = hv_cpu_die(0); + return ret; +} + +static void hv_resume(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + int ret; + + ret = hv_cpu_init(0); + WARN_ON(ret); + + /* Re-enable the hypercall page */ + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 1; + hypercall_msr.guest_physical_address = + vmalloc_to_pfn(hv_hypercall_pg_saved); + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + hv_hypercall_pg = hv_hypercall_pg_saved; + hv_hypercall_pg_saved = NULL; + + /* + * Reenlightenment notifications are disabled by hv_cpu_die(0), + * reenable them here if hv_reenlightenment_cb was previously set. + */ + if (hv_reenlightenment_cb) + set_hv_tscchange_cb(hv_reenlightenment_cb); +} + +/* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */ +static struct syscore_ops hv_syscore_ops = { + .suspend = hv_suspend, + .resume = hv_resume, +}; + +static void (* __initdata old_setup_percpu_clockev)(void); + +static void __init hv_stimer_setup_percpu_clockev(void) +{ + /* + * Ignore any errors in setting up stimer clockevents + * as we can run with the LAPIC timer as a fallback. + */ + (void)hv_stimer_alloc(false); + + /* + * Still register the LAPIC timer, because the direct-mode STIMER is + * not supported by old versions of Hyper-V. This also allows users + * to switch to LAPIC timer via /sys, if they want to. + */ + if (old_setup_percpu_clockev) + old_setup_percpu_clockev(); +} + +static void __init hv_get_partition_id(void) +{ + struct hv_get_partition_id *output_page; + u64 status; + unsigned long flags; + + local_irq_save(flags); + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); + if (!hv_result_success(status)) { + /* No point in proceeding if this failed */ + pr_err("Failed to get partition ID: %lld\n", status); + BUG(); + } + hv_current_partition_id = output_page->partition_id; + local_irq_restore(flags); +} + +/* + * This function is to be invoked early in the boot sequence after the + * hypervisor has been detected. + * + * 1. Setup the hypercall page. + * 2. Register Hyper-V specific clocksource. + * 3. Setup Hyper-V specific APIC entry points. + */ +void __init hyperv_init(void) +{ + u64 guest_id; + union hv_x64_msr_hypercall_contents hypercall_msr; + int cpuhp; + + if (x86_hyper_type != X86_HYPER_MS_HYPERV) + return; + + if (hv_common_init()) + return; + + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), GFP_KERNEL); + if (!hv_vp_assist_page) { + ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; + goto common_free; + } + + if (hv_isolation_type_snp()) { + /* Negotiate GHCB Version. */ + if (!hv_ghcb_negotiate_protocol()) + hv_ghcb_terminate(SEV_TERM_SET_GEN, + GHCB_SEV_ES_PROT_UNSUPPORTED); + + hv_ghcb_pg = alloc_percpu(union hv_ghcb *); + if (!hv_ghcb_pg) + goto free_vp_assist_page; + } + + cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + hv_cpu_init, hv_cpu_die); + if (cpuhp < 0) + goto free_ghcb_page; + + /* + * Setup the hypercall page and enable hypercalls. + * 1. Register the guest ID + * 2. Enable the hypercall and register the hypercall page + */ + guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + + /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); + if (hv_hypercall_pg == NULL) + goto clean_guest_os_id; + + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 1; + + if (hv_root_partition) { + struct page *pg; + void *src; + + /* + * For the root partition, the hypervisor will set up its + * hypercall page. The hypervisor guarantees it will not show + * up in the root's address space. The root can't change the + * location of the hypercall page. + * + * Order is important here. We must enable the hypercall page + * so it is populated with code, then copy the code to an + * executable page. + */ + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + pg = vmalloc_to_page(hv_hypercall_pg); + src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, + MEMREMAP_WB); + BUG_ON(!src); + memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE); + memunmap(src); + } else { + hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + } + + /* + * Some versions of Hyper-V that provide IBT in guest VMs have a bug + * in that there's no ENDBR64 instruction at the entry to the + * hypercall page. Because hypercalls are invoked via an indirect call + * to the hypercall page, all hypercall attempts fail when IBT is + * enabled, and Linux panics. For such buggy versions, disable IBT. + * + * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall + * page, so if future Linux kernel versions enable IBT for 32-bit + * builds, additional hypercall page hackery will be required here + * to provide an ENDBR32. + */ +#ifdef CONFIG_X86_KERNEL_IBT + if (cpu_feature_enabled(X86_FEATURE_IBT) && + *(u32 *)hv_hypercall_pg != gen_endbr()) { + setup_clear_cpu_cap(X86_FEATURE_IBT); + pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + } +#endif + + /* + * hyperv_init() is called before LAPIC is initialized: see + * apic_intr_mode_init() -> x86_platform.apic_post_init() and + * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER + * depends on LAPIC, so hv_stimer_alloc() should be called from + * x86_init.timers.setup_percpu_clockev. + */ + old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev; + x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev; + + hv_apic_init(); + + x86_init.pci.arch_init = hv_pci_init; + + register_syscore_ops(&hv_syscore_ops); + + hyperv_init_cpuhp = cpuhp; + + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) + hv_get_partition_id(); + + BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); + +#ifdef CONFIG_PCI_MSI + /* + * If we're running as root, we want to create our own PCI MSI domain. + * We can't set this in hv_pci_init because that would be too late. + */ + if (hv_root_partition) + x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; +#endif + + /* Query the VMs extended capability once, so that it can be cached. */ + hv_query_ext_cap(0); + +#ifdef CONFIG_SWIOTLB + /* + * Swiotlb bounce buffer needs to be mapped in extra address + * space. Map function doesn't work in the early place and so + * call swiotlb_update_mem_attributes() here. + */ + if (hv_is_isolation_supported()) + swiotlb_update_mem_attributes(); +#endif + + return; + +clean_guest_os_id: + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + cpuhp_remove_state(cpuhp); +free_ghcb_page: + free_percpu(hv_ghcb_pg); +free_vp_assist_page: + kfree(hv_vp_assist_page); + hv_vp_assist_page = NULL; +common_free: + hv_common_free(); +} + +/* + * This routine is called before kexec/kdump, it does the required cleanup. + */ +void hyperv_cleanup(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + union hv_reference_tsc_msr tsc_msr; + + /* Reset our OS id */ + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + + /* + * Reset hypercall page reference before reset the page, + * let hypercall operations fail safely rather than + * panic the kernel for using invalid hypercall page + */ + hv_hypercall_pg = NULL; + + /* Reset the hypercall page */ + hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL); + hypercall_msr.enable = 0; + hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + /* Reset the TSC page */ + tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC); + tsc_msr.enable = 0; + hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); +} + +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) +{ + static bool panic_reported; + u64 guest_id; + + if (in_die && !panic_on_oops) + return; + + /* + * We prefer to report panic on 'die' chain as we have proper + * registers to report, but if we miss it (e.g. on BUG()) we need + * to report it on 'panic'. + */ + if (panic_reported) + return; + panic_reported = true; + + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + + wrmsrl(HV_X64_MSR_CRASH_P0, err); + wrmsrl(HV_X64_MSR_CRASH_P1, guest_id); + wrmsrl(HV_X64_MSR_CRASH_P2, regs->ip); + wrmsrl(HV_X64_MSR_CRASH_P3, regs->ax); + wrmsrl(HV_X64_MSR_CRASH_P4, regs->sp); + + /* + * Let Hyper-V know there is crash data available + */ + wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); +} +EXPORT_SYMBOL_GPL(hyperv_report_panic); + +bool hv_is_hyperv_initialized(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + + /* + * Ensure that we're really on Hyper-V, and not a KVM or Xen + * emulation of Hyper-V + */ + if (x86_hyper_type != X86_HYPER_MS_HYPERV) + return false; + + /* + * Verify that earlier initialization succeeded by checking + * that the hypercall page is setup + */ + hypercall_msr.as_uint64 = 0; + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + return hypercall_msr.enable; +} +EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c new file mode 100644 index 000000000..68a0843d4 --- /dev/null +++ b/arch/x86/hyperv/hv_proc.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * See struct hv_deposit_memory. The first u64 is partition ID, the rest + * are GPAs. + */ +#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) + +/* Deposits exact number of pages. Must be called with interrupts enabled. */ +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) +{ + struct page **pages, *page; + int *counts; + int num_allocations; + int i, j, page_count; + int order; + u64 status; + int ret; + u64 base_pfn; + struct hv_deposit_memory *input_page; + unsigned long flags; + + if (num_pages > HV_DEPOSIT_MAX) + return -E2BIG; + if (!num_pages) + return 0; + + /* One buffer for page pointers and counts */ + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + pages = page_address(page); + + counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); + if (!counts) { + free_page((unsigned long)pages); + return -ENOMEM; + } + + /* Allocate all the pages before disabling interrupts */ + i = 0; + + while (num_pages) { + /* Find highest order we can actually allocate */ + order = 31 - __builtin_clz(num_pages); + + while (1) { + pages[i] = alloc_pages_node(node, GFP_KERNEL, order); + if (pages[i]) + break; + if (!order) { + ret = -ENOMEM; + num_allocations = i; + goto err_free_allocations; + } + --order; + } + + split_page(pages[i], order); + counts[i] = 1 << order; + num_pages -= counts[i]; + i++; + } + num_allocations = i; + + local_irq_save(flags); + + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input_page->partition_id = partition_id; + + /* Populate gpa_page_list - these will fit on the input page */ + for (i = 0, page_count = 0; i < num_allocations; ++i) { + base_pfn = page_to_pfn(pages[i]); + for (j = 0; j < counts[i]; ++j, ++page_count) + input_page->gpa_page_list[page_count] = base_pfn + j; + } + status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, + page_count, 0, input_page, NULL); + local_irq_restore(flags); + if (!hv_result_success(status)) { + pr_err("Failed to deposit pages: %lld\n", status); + ret = hv_result(status); + goto err_free_allocations; + } + + ret = 0; + goto free_buf; + +err_free_allocations: + for (i = 0; i < num_allocations; ++i) { + base_pfn = page_to_pfn(pages[i]); + for (j = 0; j < counts[i]; ++j) + __free_page(pfn_to_page(base_pfn + j)); + } + +free_buf: + free_page((unsigned long)pages); + kfree(counts); + return ret; +} + +int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) +{ + struct hv_add_logical_processor_in *input; + struct hv_add_logical_processor_out *output; + u64 status; + unsigned long flags; + int ret = HV_STATUS_SUCCESS; + int pxm = node_to_pxm(node); + + /* + * When adding a logical processor, the hypervisor may return + * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more + * pages and retry. + */ + do { + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + /* We don't do anything with the output right now */ + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + input->lp_index = lp_index; + input->apic_id = apic_id; + input->flags = 0; + input->proximity_domain_info.domain_id = pxm; + input->proximity_domain_info.flags.reserved = 0; + input->proximity_domain_info.flags.proximity_info_valid = 1; + input->proximity_domain_info.flags.proximity_preferred = 1; + status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, + input, output); + local_irq_restore(flags); + + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) { + pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, + lp_index, apic_id, status); + ret = hv_result(status); + } + break; + } + ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); + } while (!ret); + + return ret; +} + +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) +{ + struct hv_create_vp *input; + u64 status; + unsigned long irq_flags; + int ret = HV_STATUS_SUCCESS; + int pxm = node_to_pxm(node); + + /* Root VPs don't seem to need pages deposited */ + if (partition_id != hv_current_partition_id) { + /* The value 90 is empirically determined. It may change. */ + ret = hv_call_deposit_pages(node, partition_id, 90); + if (ret) + return ret; + } + + do { + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input->partition_id = partition_id; + input->vp_index = vp_index; + input->flags = flags; + input->subnode_type = HvSubnodeAny; + if (node != NUMA_NO_NODE) { + input->proximity_domain_info.domain_id = pxm; + input->proximity_domain_info.flags.reserved = 0; + input->proximity_domain_info.flags.proximity_info_valid = 1; + input->proximity_domain_info.flags.proximity_preferred = 1; + } else { + input->proximity_domain_info.as_uint64 = 0; + } + status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); + local_irq_restore(irq_flags); + + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) { + pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, + vp_index, flags, status); + ret = hv_result(status); + } + break; + } + ret = hv_call_deposit_pages(node, partition_id, 1); + + } while (!ret); + + return ret; +} + diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c new file mode 100644 index 000000000..91cfe698b --- /dev/null +++ b/arch/x86/hyperv/hv_spinlock.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific spinlock code. + * + * Copyright (C) 2018, Intel, Inc. + * + * Author : Yi Sun + */ + +#define pr_fmt(fmt) "Hyper-V: " fmt + +#include + +#include +#include +#include + +static bool __initdata hv_pvspin = true; + +static void hv_qlock_kick(int cpu) +{ + apic->send_IPI(cpu, X86_PLATFORM_IPI_VECTOR); +} + +static void hv_qlock_wait(u8 *byte, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + /* + * Reading HV_X64_MSR_GUEST_IDLE MSR tells the hypervisor that the + * vCPU can be put into 'idle' state. This 'idle' state is + * terminated by an IPI, usually from hv_qlock_kick(), even if + * interrupts are disabled on the vCPU. + * + * To prevent a race against the unlock path it is required to + * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE + * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between + * the lock value check and the rdmsrl() then the vCPU might be put + * into 'idle' state by the hypervisor and kept in that state for + * an unspecified amount of time. + */ + local_irq_save(flags); + /* + * Only issue the rdmsrl() when the lock state has not changed. + */ + if (READ_ONCE(*byte) == val) { + unsigned long msr_val; + + rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val); + + (void)msr_val; + } + local_irq_restore(flags); +} + +/* + * Hyper-V does not support this so far. + */ +__visible bool hv_vcpu_is_preempted(int vcpu) +{ + return false; +} +PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted); + +void __init hv_init_spinlocks(void) +{ + if (!hv_pvspin || !apic || + !(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) || + !(ms_hyperv.features & HV_MSR_GUEST_IDLE_AVAILABLE)) { + pr_info("PV spinlocks disabled\n"); + return; + } + pr_info("PV spinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops.lock.wait = hv_qlock_wait; + pv_ops.lock.kick = hv_qlock_kick; + pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted); +} + +static __init int hv_parse_nopvspin(char *arg) +{ + hv_pvspin = false; + return 0; +} +early_param("hv_nopvspin", hv_parse_nopvspin); diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c new file mode 100644 index 000000000..42c70d28e --- /dev/null +++ b/arch/x86/hyperv/irqdomain.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor. + * + * Authors: + * Sunil Muthuswamy + * Wei Liu + */ + +#include +#include +#include + +static int hv_map_interrupt(union hv_device_id device_id, bool level, + int cpu, int vector, struct hv_interrupt_entry *entry) +{ + struct hv_input_map_device_interrupt *input; + struct hv_output_map_device_interrupt *output; + struct hv_device_interrupt_descriptor *intr_desc; + unsigned long flags; + u64 status; + int nr_bank, var_size; + + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + intr_desc = &input->interrupt_descriptor; + memset(input, 0, sizeof(*input)); + input->partition_id = hv_current_partition_id; + input->device_id = device_id.as_uint64; + intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; + intr_desc->vector_count = 1; + intr_desc->target.vector = vector; + + if (level) + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL; + else + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; + + intr_desc->target.vp_set.valid_bank_mask = 0; + intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); + if (nr_bank < 0) { + local_irq_restore(flags); + pr_err("%s: unable to generate VP set\n", __func__); + return EINVAL; + } + intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; + + /* + * var-sized hypercall, var-size starts after vp_mask (thus + * vp_set.format does not count, but vp_set.valid_bank_mask + * does). + */ + var_size = nr_bank + 1; + + status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size, + input, output); + *entry = output->interrupt_entry; + + local_irq_restore(flags); + + if (!hv_result_success(status)) + pr_err("%s: hypercall failed, status %lld\n", __func__, status); + + return hv_result(status); +} + +static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) +{ + unsigned long flags; + struct hv_input_unmap_device_interrupt *input; + struct hv_interrupt_entry *intr_entry; + u64 status; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + memset(input, 0, sizeof(*input)); + intr_entry = &input->interrupt_entry; + input->partition_id = hv_current_partition_id; + input->device_id = id; + *intr_entry = *old_entry; + + status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); + local_irq_restore(flags); + + return hv_result(status); +} + +#ifdef CONFIG_PCI_MSI +struct rid_data { + struct pci_dev *bridge; + u32 rid; +}; + +static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data) +{ + struct rid_data *rd = data; + u8 bus = PCI_BUS_NUM(rd->rid); + + if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) { + rd->bridge = pdev; + rd->rid = alias; + } + + return 0; +} + +static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) +{ + union hv_device_id dev_id; + struct rid_data data = { + .bridge = NULL, + .rid = PCI_DEVID(dev->bus->number, dev->devfn) + }; + + pci_for_each_dma_alias(dev, get_rid_cb, &data); + + dev_id.as_uint64 = 0; + dev_id.device_type = HV_DEVICE_TYPE_PCI; + dev_id.pci.segment = pci_domain_nr(dev->bus); + + dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid); + dev_id.pci.bdf.device = PCI_SLOT(data.rid); + dev_id.pci.bdf.function = PCI_FUNC(data.rid); + dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE; + + if (data.bridge) { + int pos; + + /* + * Microsoft Hypervisor requires a bus range when the bridge is + * running in PCI-X mode. + * + * To distinguish conventional vs PCI-X bridge, we can check + * the bridge's PCI-X Secondary Status Register, Secondary Bus + * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge + * Specification Revision 1.0 5.2.2.1.3. + * + * Value zero means it is in conventional mode, otherwise it is + * in PCI-X mode. + */ + + pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX); + if (pos) { + u16 status; + + pci_read_config_word(data.bridge, pos + + PCI_X_BRIDGE_SSTATUS, &status); + + if (status & PCI_X_SSTATUS_FREQ) { + /* Non-zero, PCI-X mode */ + u8 sec_bus, sub_bus; + + dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE; + + pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus); + dev_id.pci.shadow_bus_range.secondary_bus = sec_bus; + pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus); + dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus; + } + } + } + + return dev_id; +} + +static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, + struct hv_interrupt_entry *entry) +{ + union hv_device_id device_id = hv_build_pci_dev_id(dev); + + return hv_map_interrupt(device_id, false, cpu, vector, entry); +} + +static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) +{ + /* High address is always 0 */ + msg->address_hi = 0; + msg->address_lo = entry->msi_entry.address.as_uint32; + msg->data = entry->msi_entry.data.as_uint32; +} + +static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); +static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *msidesc; + struct pci_dev *dev; + struct hv_interrupt_entry out_entry, *stored_entry; + struct irq_cfg *cfg = irqd_cfg(data); + const cpumask_t *affinity; + int cpu; + u64 status; + + msidesc = irq_data_get_msi_desc(data); + dev = msi_desc_to_pci_dev(msidesc); + + if (!cfg) { + pr_debug("%s: cfg is NULL", __func__); + return; + } + + affinity = irq_data_get_effective_affinity_mask(data); + cpu = cpumask_first_and(affinity, cpu_online_mask); + + if (data->chip_data) { + /* + * This interrupt is already mapped. Let's unmap first. + * + * We don't use retarget interrupt hypercalls here because + * Microsoft Hypervisor doens't allow root to change the vector + * or specify VPs outside of the set that is initially used + * during mapping. + */ + stored_entry = data->chip_data; + data->chip_data = NULL; + + status = hv_unmap_msi_interrupt(dev, stored_entry); + + kfree(stored_entry); + + if (status != HV_STATUS_SUCCESS) { + pr_debug("%s: failed to unmap, status %lld", __func__, status); + return; + } + } + + stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); + if (!stored_entry) { + pr_debug("%s: failed to allocate chip data\n", __func__); + return; + } + + status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); + if (status != HV_STATUS_SUCCESS) { + kfree(stored_entry); + return; + } + + *stored_entry = out_entry; + data->chip_data = stored_entry; + entry_to_msi_msg(&out_entry, msg); + + return; +} + +static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry) +{ + return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry); +} + +static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) +{ + struct hv_interrupt_entry old_entry; + struct msi_msg msg; + u64 status; + + if (!irqd->chip_data) { + pr_debug("%s: no chip data\n!", __func__); + return; + } + + old_entry = *(struct hv_interrupt_entry *)irqd->chip_data; + entry_to_msi_msg(&old_entry, &msg); + + kfree(irqd->chip_data); + irqd->chip_data = NULL; + + status = hv_unmap_msi_interrupt(dev, &old_entry); + + if (status != HV_STATUS_SUCCESS) + pr_err("%s: hypercall failed, status %lld\n", __func__, status); +} + +static void hv_msi_free_irq(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq) +{ + struct irq_data *irqd = irq_get_irq_data(virq); + struct msi_desc *desc; + + if (!irqd) + return; + + desc = irq_data_get_msi_desc(irqd); + if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) + return; + + hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); +} + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip hv_pci_msi_controller = { + .name = "HV-PCI-MSI", + .irq_unmask = pci_msi_unmask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = hv_irq_compose_msi_msg, + .irq_set_affinity = msi_domain_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static struct msi_domain_ops pci_msi_domain_ops = { + .msi_free = hv_msi_free_irq, + .msi_prepare = pci_msi_prepare, +}; + +static struct msi_domain_info hv_pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &hv_pci_msi_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; + +struct irq_domain * __init hv_create_pci_msi_domain(void) +{ + struct irq_domain *d = NULL; + struct fwnode_handle *fn; + + fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); + if (fn) + d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); + + /* No point in going further if we can't get an irq domain */ + BUG_ON(!d); + + return d; +} + +#endif /* CONFIG_PCI_MSI */ + +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry) +{ + union hv_device_id device_id; + + device_id.as_uint64 = 0; + device_id.device_type = HV_DEVICE_TYPE_IOAPIC; + device_id.ioapic.ioapic_id = (u8)ioapic_id; + + return hv_unmap_interrupt(device_id.as_uint64, entry); +} +EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt); + +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector, + struct hv_interrupt_entry *entry) +{ + union hv_device_id device_id; + + device_id.as_uint64 = 0; + device_id.device_type = HV_DEVICE_TYPE_IOAPIC; + device_id.ioapic.ioapic_id = (u8)ioapic_id; + + return hv_map_interrupt(device_id, level, cpu, vector, entry); +} +EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt); diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c new file mode 100644 index 000000000..1dbcbd9da --- /dev/null +++ b/arch/x86/hyperv/ivm.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V Isolation VM interface with paravisor and hypervisor + * + * Author: + * Tianyu Lan + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_AMD_MEM_ENCRYPT + +#define GHCB_USAGE_HYPERV_CALL 1 + +union hv_ghcb { + struct ghcb ghcb; + struct { + u64 hypercalldata[509]; + u64 outputgpa; + union { + union { + struct { + u32 callcode : 16; + u32 isfast : 1; + u32 reserved1 : 14; + u32 isnested : 1; + u32 countofelements : 12; + u32 reserved2 : 4; + u32 repstartindex : 12; + u32 reserved3 : 4; + }; + u64 asuint64; + } hypercallinput; + union { + struct { + u16 callstatus; + u16 reserved1; + u32 elementsprocessed : 12; + u32 reserved2 : 20; + }; + u64 asunit64; + } hypercalloutput; + }; + u64 reserved2; + } hypercall; +} __packed __aligned(HV_HYP_PAGE_SIZE); + +static u16 hv_ghcb_version __ro_after_init; + +u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) +{ + union hv_ghcb *hv_ghcb; + void **ghcb_base; + unsigned long flags; + u64 status; + + if (!hv_ghcb_pg) + return -EFAULT; + + WARN_ON(in_nmi()); + + local_irq_save(flags); + ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); + hv_ghcb = (union hv_ghcb *)*ghcb_base; + if (!hv_ghcb) { + local_irq_restore(flags); + return -EFAULT; + } + + hv_ghcb->ghcb.protocol_version = GHCB_PROTOCOL_MAX; + hv_ghcb->ghcb.ghcb_usage = GHCB_USAGE_HYPERV_CALL; + + hv_ghcb->hypercall.outputgpa = (u64)output; + hv_ghcb->hypercall.hypercallinput.asuint64 = 0; + hv_ghcb->hypercall.hypercallinput.callcode = control; + + if (input_size) + memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size); + + VMGEXIT(); + + hv_ghcb->ghcb.ghcb_usage = 0xffffffff; + memset(hv_ghcb->ghcb.save.valid_bitmap, 0, + sizeof(hv_ghcb->ghcb.save.valid_bitmap)); + + status = hv_ghcb->hypercall.hypercalloutput.callstatus; + + local_irq_restore(flags); + + return status; +} + +static inline u64 rd_ghcb_msr(void) +{ + return __rdmsr(MSR_AMD64_SEV_ES_GHCB); +} + +static inline void wr_ghcb_msr(u64 val) +{ + native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val); +} + +static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, + u64 exit_info_1, u64 exit_info_2) +{ + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = hv_ghcb_version; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + VMGEXIT(); + + if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0)) + return ES_VMM_ERROR; + else + return ES_OK; +} + +void hv_ghcb_terminate(unsigned int set, unsigned int reason) +{ + u64 val = GHCB_MSR_TERM_REQ; + + /* Tell the hypervisor what went wrong. */ + val |= GHCB_SEV_TERM_REASON(set, reason); + + /* Request Guest Termination from Hypvervisor */ + wr_ghcb_msr(val); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + +bool hv_ghcb_negotiate_protocol(void) +{ + u64 ghcb_gpa; + u64 val; + + /* Save ghcb page gpa. */ + ghcb_gpa = rd_ghcb_msr(); + + /* Do the GHCB protocol version negotiation */ + wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); + VMGEXIT(); + val = rd_ghcb_msr(); + + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) + return false; + + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) + return false; + + hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), + GHCB_PROTOCOL_MAX); + + /* Write ghcb page back after negotiating protocol. */ + wr_ghcb_msr(ghcb_gpa); + VMGEXIT(); + + return true; +} + +void hv_ghcb_msr_write(u64 msr, u64 value) +{ + union hv_ghcb *hv_ghcb; + void **ghcb_base; + unsigned long flags; + + if (!hv_ghcb_pg) + return; + + WARN_ON(in_nmi()); + + local_irq_save(flags); + ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); + hv_ghcb = (union hv_ghcb *)*ghcb_base; + if (!hv_ghcb) { + local_irq_restore(flags); + return; + } + + ghcb_set_rcx(&hv_ghcb->ghcb, msr); + ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value)); + ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value)); + + if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0)) + pr_warn("Fail to write msr via ghcb %llx.\n", msr); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(hv_ghcb_msr_write); + +void hv_ghcb_msr_read(u64 msr, u64 *value) +{ + union hv_ghcb *hv_ghcb; + void **ghcb_base; + unsigned long flags; + + /* Check size of union hv_ghcb here. */ + BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE); + + if (!hv_ghcb_pg) + return; + + WARN_ON(in_nmi()); + + local_irq_save(flags); + ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); + hv_ghcb = (union hv_ghcb *)*ghcb_base; + if (!hv_ghcb) { + local_irq_restore(flags); + return; + } + + ghcb_set_rcx(&hv_ghcb->ghcb, msr); + if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0)) + pr_warn("Fail to read msr via ghcb %llx.\n", msr); + else + *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) + | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); +#endif + +enum hv_isolation_type hv_get_isolation_type(void) +{ + if (!(ms_hyperv.priv_high & HV_ISOLATION)) + return HV_ISOLATION_TYPE_NONE; + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); +} +EXPORT_SYMBOL_GPL(hv_get_isolation_type); + +/* + * hv_is_isolation_supported - Check system runs in the Hyper-V + * isolation VM. + */ +bool hv_is_isolation_supported(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return false; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) + return false; + + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; +} + +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); + +/* + * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * isolation VM. + */ +bool hv_isolation_type_snp(void) +{ + return static_branch_unlikely(&isolation_type_snp); +} + +/* + * hv_mark_gpa_visibility - Set pages visible to host via hvcall. + * + * In Isolation VM, all guest memory is encrypted from host and guest + * needs to set memory visible to host via hvcall before sharing memory + * with host. + */ +static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], + enum hv_mem_host_visibility visibility) +{ + struct hv_gpa_range_for_visibility **input_pcpu, *input; + u16 pages_processed; + u64 hv_status; + unsigned long flags; + + /* no-op if partition isolation is not enabled */ + if (!hv_is_isolation_supported()) + return 0; + + if (count > HV_MAX_MODIFY_GPA_REP_COUNT) { + pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count, + HV_MAX_MODIFY_GPA_REP_COUNT); + return -EINVAL; + } + + local_irq_save(flags); + input_pcpu = (struct hv_gpa_range_for_visibility **) + this_cpu_ptr(hyperv_pcpu_input_arg); + input = *input_pcpu; + if (unlikely(!input)) { + local_irq_restore(flags); + return -EINVAL; + } + + input->partition_id = HV_PARTITION_ID_SELF; + input->host_visibility = visibility; + input->reserved0 = 0; + input->reserved1 = 0; + memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); + hv_status = hv_do_rep_hypercall( + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, + 0, input, &pages_processed); + local_irq_restore(flags); + + if (hv_result_success(hv_status)) + return 0; + else + return -EFAULT; +} + +/* + * hv_set_mem_host_visibility - Set specified memory visible to host. + * + * In Isolation VM, all guest memory is encrypted from host and guest + * needs to set memory visible to host via hvcall before sharing memory + * with host. This function works as wrap of hv_mark_gpa_visibility() + * with memory base and size. + */ +int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visible) +{ + enum hv_mem_host_visibility visibility = visible ? + VMBUS_PAGE_VISIBLE_READ_WRITE : VMBUS_PAGE_NOT_VISIBLE; + u64 *pfn_array; + int ret = 0; + int i, pfn; + + if (!hv_is_isolation_supported() || !hv_hypercall_pg) + return 0; + + pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); + if (!pfn_array) + return -ENOMEM; + + for (i = 0, pfn = 0; i < pagecount; i++) { + pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE); + pfn++; + + if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { + ret = hv_mark_gpa_visibility(pfn, pfn_array, + visibility); + if (ret) + goto err_free_pfn_array; + pfn = 0; + } + } + + err_free_pfn_array: + kfree(pfn_array); + return ret; +} + +/* + * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. + */ +void *hv_map_memory(void *addr, unsigned long size) +{ + unsigned long *pfns = kcalloc(size / PAGE_SIZE, + sizeof(unsigned long), GFP_KERNEL); + void *vaddr; + int i; + + if (!pfns) + return NULL; + + for (i = 0; i < size / PAGE_SIZE; i++) + pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) + + (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); + + vaddr = vmap_pfn(pfns, size / PAGE_SIZE, PAGE_KERNEL_IO); + kfree(pfns); + + return vaddr; +} + +void hv_unmap_memory(void *addr) +{ + vunmap(addr); +} diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c new file mode 100644 index 000000000..0ad2378fe --- /dev/null +++ b/arch/x86/hyperv/mmu.c @@ -0,0 +1,243 @@ +#define pr_fmt(fmt) "Hyper-V: " fmt + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +/* Each gva in gva_list encodes up to 4096 pages to flush */ +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) + +static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, + const struct flush_tlb_info *info); + +/* + * Fills in gva_list starting from offset. Returns the number of items added. + */ +static inline int fill_gva_list(u64 gva_list[], int offset, + unsigned long start, unsigned long end) +{ + int gva_n = offset; + unsigned long cur = start, diff; + + do { + diff = end > cur ? end - cur : 0; + + gva_list[gva_n] = cur & PAGE_MASK; + /* + * Lower 12 bits encode the number of additional + * pages to flush (in addition to the 'cur' page). + */ + if (diff >= HV_TLB_FLUSH_UNIT) { + gva_list[gva_n] |= ~PAGE_MASK; + cur += HV_TLB_FLUSH_UNIT; + } else if (diff) { + gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; + cur = end; + } + + gva_n++; + + } while (cur < end); + + return gva_n - offset; +} + +static void hyperv_flush_tlb_multi(const struct cpumask *cpus, + const struct flush_tlb_info *info) +{ + int cpu, vcpu, gva_n, max_gvas; + struct hv_tlb_flush **flush_pcpu; + struct hv_tlb_flush *flush; + u64 status; + unsigned long flags; + + trace_hyperv_mmu_flush_tlb_multi(cpus, info); + + if (!hv_hypercall_pg) + goto do_native; + + local_irq_save(flags); + + flush_pcpu = (struct hv_tlb_flush **) + this_cpu_ptr(hyperv_pcpu_input_arg); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto do_native; + } + + if (info->mm) { + /* + * AddressSpace argument must match the CR3 with PCID bits + * stripped out. + */ + flush->address_space = virt_to_phys(info->mm->pgd); + flush->address_space &= CR3_ADDR_MASK; + flush->flags = 0; + } else { + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + } + + flush->processor_mask = 0; + if (cpumask_equal(cpus, cpu_present_mask)) { + flush->flags |= HV_FLUSH_ALL_PROCESSORS; + } else { + /* + * From the supplied CPU set we need to figure out if we can get + * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} + * hypercalls. This is possible when the highest VP number in + * the set is < 64. As VP numbers are usually in ascending order + * and match Linux CPU ids, here is an optimization: we check + * the VP number for the highest bit in the supplied set first + * so we can quickly find out if using *_EX hypercalls is a + * must. We will also check all VP numbers when walking the + * supplied CPU set to remain correct in all cases. + */ + cpu = cpumask_last(cpus); + + if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64) + goto do_ex_hypercall; + + for_each_cpu(cpu, cpus) { + vcpu = hv_cpu_number_to_vp_number(cpu); + if (vcpu == VP_INVAL) { + local_irq_restore(flags); + goto do_native; + } + + if (vcpu >= 64) + goto do_ex_hypercall; + + __set_bit(vcpu, (unsigned long *) + &flush->processor_mask); + } + + /* nothing to flush if 'processor_mask' ends up being empty */ + if (!flush->processor_mask) { + local_irq_restore(flags); + return; + } + } + + /* + * We can flush not more than max_gvas with one hypercall. Flush the + * whole address space if we were asked to do more. + */ + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); + + if (info->end == TLB_FLUSH_ALL) { + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, + flush, NULL); + } else if (info->end && + ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { + status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, + flush, NULL); + } else { + gva_n = fill_gva_list(flush->gva_list, 0, + info->start, info->end); + status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, + gva_n, 0, flush, NULL); + } + goto check_status; + +do_ex_hypercall: + status = hyperv_flush_tlb_others_ex(cpus, info); + +check_status: + local_irq_restore(flags); + + if (hv_result_success(status)) + return; +do_native: + native_flush_tlb_multi(cpus, info); +} + +static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, + const struct flush_tlb_info *info) +{ + int nr_bank = 0, max_gvas, gva_n; + struct hv_tlb_flush_ex **flush_pcpu; + struct hv_tlb_flush_ex *flush; + u64 status; + + if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return HV_STATUS_INVALID_PARAMETER; + + flush_pcpu = (struct hv_tlb_flush_ex **) + this_cpu_ptr(hyperv_pcpu_input_arg); + + flush = *flush_pcpu; + + if (info->mm) { + /* + * AddressSpace argument must match the CR3 with PCID bits + * stripped out. + */ + flush->address_space = virt_to_phys(info->mm->pgd); + flush->address_space &= CR3_ADDR_MASK; + flush->flags = 0; + } else { + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + } + + flush->hv_vp_set.valid_bank_mask = 0; + + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); + if (nr_bank < 0) + return HV_STATUS_INVALID_PARAMETER; + + /* + * We can flush not more than max_gvas with one hypercall. Flush the + * whole address space if we were asked to do more. + */ + max_gvas = + (PAGE_SIZE - sizeof(*flush) - nr_bank * + sizeof(flush->hv_vp_set.bank_contents[0])) / + sizeof(flush->gva_list[0]); + + if (info->end == TLB_FLUSH_ALL) { + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + 0, nr_bank, flush, NULL); + } else if (info->end && + ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + 0, nr_bank, flush, NULL); + } else { + gva_n = fill_gva_list(flush->gva_list, nr_bank, + info->start, info->end); + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, + gva_n, nr_bank, flush, NULL); + } + + return status; +} + +void hyperv_setup_mmu_ops(void) +{ + if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) + return; + + pr_info("Using hypercall for remote TLB flush\n"); + pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; + pv_ops.mmu.tlb_remove_table = tlb_remove_table; +} diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c new file mode 100644 index 000000000..5d70968c8 --- /dev/null +++ b/arch/x86/hyperv/nested.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V nested virtualization code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : Lan Tianyu + */ +#define pr_fmt(fmt) "Hyper-V: " fmt + + +#include +#include +#include +#include + +#include + +int hyperv_flush_guest_mapping(u64 as) +{ + struct hv_guest_mapping_flush **flush_pcpu; + struct hv_guest_mapping_flush *flush; + u64 status; + unsigned long flags; + int ret = -ENOTSUPP; + + if (!hv_hypercall_pg) + goto fault; + + local_irq_save(flags); + + flush_pcpu = (struct hv_guest_mapping_flush **) + this_cpu_ptr(hyperv_pcpu_input_arg); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto fault; + } + + flush->address_space = as; + flush->flags = 0; + + status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE, + flush, NULL); + local_irq_restore(flags); + + if (hv_result_success(status)) + ret = 0; + +fault: + trace_hyperv_nested_flush_guest_mapping(as, ret); + return ret; +} +EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping); + +int hyperv_fill_flush_guest_mapping_list( + struct hv_guest_mapping_flush_list *flush, + u64 start_gfn, u64 pages) +{ + u64 cur = start_gfn; + u64 additional_pages; + int gpa_n = 0; + + do { + /* + * If flush requests exceed max flush count, go back to + * flush tlbs without range. + */ + if (gpa_n >= HV_MAX_FLUSH_REP_COUNT) + return -ENOSPC; + + additional_pages = min_t(u64, pages, HV_MAX_FLUSH_PAGES) - 1; + + flush->gpa_list[gpa_n].page.additional_pages = additional_pages; + flush->gpa_list[gpa_n].page.largepage = false; + flush->gpa_list[gpa_n].page.basepfn = cur; + + pages -= additional_pages + 1; + cur += additional_pages + 1; + gpa_n++; + } while (pages > 0); + + return gpa_n; +} +EXPORT_SYMBOL_GPL(hyperv_fill_flush_guest_mapping_list); + +int hyperv_flush_guest_mapping_range(u64 as, + hyperv_fill_flush_list_func fill_flush_list_func, void *data) +{ + struct hv_guest_mapping_flush_list **flush_pcpu; + struct hv_guest_mapping_flush_list *flush; + u64 status; + unsigned long flags; + int ret = -ENOTSUPP; + int gpa_n = 0; + + if (!hv_hypercall_pg || !fill_flush_list_func) + goto fault; + + local_irq_save(flags); + + flush_pcpu = (struct hv_guest_mapping_flush_list **) + this_cpu_ptr(hyperv_pcpu_input_arg); + + flush = *flush_pcpu; + if (unlikely(!flush)) { + local_irq_restore(flags); + goto fault; + } + + flush->address_space = as; + flush->flags = 0; + + gpa_n = fill_flush_list_func(flush, data); + if (gpa_n < 0) { + local_irq_restore(flags); + goto fault; + } + + status = hv_do_rep_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST, + gpa_n, 0, flush, NULL); + + local_irq_restore(flags); + + if (hv_result_success(status)) + ret = 0; + else + ret = hv_result(status); +fault: + trace_hyperv_nested_flush_guest_mapping_range(as, ret); + return ret; +} +EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping_range); diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile new file mode 100644 index 000000000..e48105669 --- /dev/null +++ b/arch/x86/ia32/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the ia32 kernel emulation subsystem. +# + +obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o + +audit-class-$(CONFIG_AUDIT) := audit.o +obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y) diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c new file mode 100644 index 000000000..59e19549e --- /dev/null +++ b/arch/x86/ia32/audit.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +unsigned ia32_dir_class[] = { +#include +~0U +}; + +unsigned ia32_chattr_class[] = { +#include +~0U +}; + +unsigned ia32_write_class[] = { +#include +~0U +}; + +unsigned ia32_read_class[] = { +#include +~0U +}; + +unsigned ia32_signal_class[] = { +#include +~0U +}; + +int ia32_classify_syscall(unsigned syscall) +{ + switch (syscall) { + case __NR_open: + return AUDITSC_OPEN; + case __NR_openat: + return AUDITSC_OPENAT; + case __NR_socketcall: + return AUDITSC_SOCKETCALL; + case __NR_execve: + case __NR_execveat: + return AUDITSC_EXECVE; + case __NR_openat2: + return AUDITSC_OPENAT2; + default: + return AUDITSC_COMPAT; + } +} diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c new file mode 100644 index 000000000..c9c385932 --- /dev/null +++ b/arch/x86/ia32/ia32_signal.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/x86_64/ia32/ia32_signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void reload_segments(struct sigcontext_32 *sc) +{ + unsigned int cur; + + savesegment(gs, cur); + if ((sc->gs | 0x03) != cur) + load_gs_index(sc->gs | 0x03); + savesegment(fs, cur); + if ((sc->fs | 0x03) != cur) + loadsegment(fs, sc->fs | 0x03); + savesegment(ds, cur); + if ((sc->ds | 0x03) != cur) + loadsegment(ds, sc->ds | 0x03); + savesegment(es, cur); + if ((sc->es | 0x03) != cur) + loadsegment(es, sc->es | 0x03); +} + +/* + * Do a signal return; undo the signal stack. + */ +static bool ia32_restore_sigcontext(struct pt_regs *regs, + struct sigcontext_32 __user *usc) +{ + struct sigcontext_32 sc; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + if (unlikely(copy_from_user(&sc, usc, sizeof(sc)))) + return false; + + /* Get only the ia32 registers. */ + regs->bx = sc.bx; + regs->cx = sc.cx; + regs->dx = sc.dx; + regs->si = sc.si; + regs->di = sc.di; + regs->bp = sc.bp; + regs->ax = sc.ax; + regs->sp = sc.sp; + regs->ip = sc.ip; + + /* Get CS/SS and force CPL3 */ + regs->cs = sc.cs | 0x03; + regs->ss = sc.ss | 0x03; + + regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS); + /* disable syscall checks */ + regs->orig_ax = -1; + + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + reload_segments(&sc); + return fpu__restore_sig(compat_ptr(sc.fpstate), 1); +} + +COMPAT_SYSCALL_DEFINE0(sigreturn) +{ + struct pt_regs *regs = current_pt_regs(); + struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); + sigset_t set; + + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || __get_user(((__u32 *)&set)[1], &frame->extramask[0])) + goto badframe; + + set_current_blocked(&set); + + if (!ia32_restore_sigcontext(regs, &frame->sc)) + goto badframe; + return regs->ax; + +badframe: + signal_fault(regs, frame, "32bit sigreturn"); + return 0; +} + +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) +{ + struct pt_regs *regs = current_pt_regs(); + struct rt_sigframe_ia32 __user *frame; + sigset_t set; + + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); + + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask)) + goto badframe; + + set_current_blocked(&set); + + if (!ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (compat_restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + return regs->ax; + +badframe: + signal_fault(regs, frame, "32bit rt sigreturn"); + return 0; +} + +/* + * Set up a signal frame. + */ + +#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; }) + +static __always_inline int +__unsafe_setup_sigcontext32(struct sigcontext_32 __user *sc, + void __user *fpstate, + struct pt_regs *regs, unsigned int mask) +{ + unsafe_put_user(get_user_seg(gs), (unsigned int __user *)&sc->gs, Efault); + unsafe_put_user(get_user_seg(fs), (unsigned int __user *)&sc->fs, Efault); + unsafe_put_user(get_user_seg(ds), (unsigned int __user *)&sc->ds, Efault); + unsafe_put_user(get_user_seg(es), (unsigned int __user *)&sc->es, Efault); + + unsafe_put_user(regs->di, &sc->di, Efault); + unsafe_put_user(regs->si, &sc->si, Efault); + unsafe_put_user(regs->bp, &sc->bp, Efault); + unsafe_put_user(regs->sp, &sc->sp, Efault); + unsafe_put_user(regs->bx, &sc->bx, Efault); + unsafe_put_user(regs->dx, &sc->dx, Efault); + unsafe_put_user(regs->cx, &sc->cx, Efault); + unsafe_put_user(regs->ax, &sc->ax, Efault); + unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault); + unsafe_put_user(current->thread.error_code, &sc->err, Efault); + unsafe_put_user(regs->ip, &sc->ip, Efault); + unsafe_put_user(regs->cs, (unsigned int __user *)&sc->cs, Efault); + unsafe_put_user(regs->flags, &sc->flags, Efault); + unsafe_put_user(regs->sp, &sc->sp_at_signal, Efault); + unsafe_put_user(regs->ss, (unsigned int __user *)&sc->ss, Efault); + + unsafe_put_user(ptr_to_compat(fpstate), &sc->fpstate, Efault); + + /* non-iBCS2 extensions.. */ + unsafe_put_user(mask, &sc->oldmask, Efault); + unsafe_put_user(current->thread.cr2, &sc->cr2, Efault); + return 0; + +Efault: + return -EFAULT; +} + +#define unsafe_put_sigcontext32(sc, fp, regs, set, label) \ +do { \ + if (__unsafe_setup_sigcontext32(sc, fp, regs, set->sig[0])) \ + goto label; \ +} while(0) + +/* + * Determine which stack to use.. + */ +static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + size_t frame_size, + void __user **fpstate) +{ + unsigned long sp, fx_aligned, math_size; + + /* Default to using normal stack */ + sp = regs->sp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ksig->ka.sa.sa_flags & SA_ONSTACK) + sp = sigsp(sp, ksig); + /* This is the legacy signal stack switching. */ + else if (regs->ss != __USER32_DS && + !(ksig->ka.sa.sa_flags & SA_RESTORER) && + ksig->ka.sa.sa_restorer) + sp = (unsigned long) ksig->ka.sa.sa_restorer; + + sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_32 __user *) sp; + if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, + math_size)) + return (void __user *) -1L; + + sp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + sp = ((sp + 4) & -16ul) - 4; + return (void __user *) sp; +} + +int ia32_setup_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs) +{ + struct sigframe_ia32 __user *frame; + void __user *restorer; + void __user *fp = NULL; + + /* copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u16 poplmovl; + u32 val; + u16 int80; + } __attribute__((packed)) code = { + 0xb858, /* popl %eax ; movl $...,%eax */ + __NR_ia32_sigreturn, + 0x80cd, /* int $0x80 */ + }; + + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp); + + if (ksig->ka.sa.sa_flags & SA_RESTORER) { + restorer = ksig->ka.sa.sa_restorer; + } else { + /* Return stub is in 32bit vsyscall page */ + if (current->mm->context.vdso) + restorer = current->mm->context.vdso + + vdso_image_32.sym___kernel_sigreturn; + else + restorer = &frame->retcode; + } + + if (!user_access_begin(frame, sizeof(*frame))) + return -EFAULT; + + unsafe_put_user(sig, &frame->sig, Efault); + unsafe_put_sigcontext32(&frame->sc, fp, regs, set, Efault); + unsafe_put_user(set->sig[1], &frame->extramask[0], Efault); + unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault); + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + unsafe_put_user(*((u64 *)&code), (u64 __user *)frame->retcode, Efault); + user_access_end(); + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ksig->ka.sa.sa_handler; + + /* Make -mregparm=3 work */ + regs->ax = sig; + regs->dx = 0; + regs->cx = 0; + + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + + return 0; +Efault: + user_access_end(); + return -EFAULT; +} + +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe_ia32 __user *frame; + void __user *restorer; + void __user *fp = NULL; + + /* unsafe_put_user optimizes that into a single 8 byte store */ + static const struct { + u8 movl; + u32 val; + u16 int80; + u8 pad; + } __attribute__((packed)) code = { + 0xb8, + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, + }; + + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp); + + if (!user_access_begin(frame, sizeof(*frame))) + return -EFAULT; + + unsafe_put_user(sig, &frame->sig, Efault); + unsafe_put_user(ptr_to_compat(&frame->info), &frame->pinfo, Efault); + unsafe_put_user(ptr_to_compat(&frame->uc), &frame->puc, Efault); + + /* Create the ucontext. */ + if (static_cpu_has(X86_FEATURE_XSAVE)) + unsafe_put_user(UC_FP_XSTATE, &frame->uc.uc_flags, Efault); + else + unsafe_put_user(0, &frame->uc.uc_flags, Efault); + unsafe_put_user(0, &frame->uc.uc_link, Efault); + unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault); + + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = ksig->ka.sa.sa_restorer; + else + restorer = current->mm->context.vdso + + vdso_image_32.sym___kernel_rt_sigreturn; + unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + unsafe_put_user(*((u64 *)&code), (u64 __user *)frame->retcode, Efault); + unsafe_put_sigcontext32(&frame->uc.uc_mcontext, fp, regs, set, Efault); + unsafe_put_user(*(__u64 *)set, (__u64 __user *)&frame->uc.uc_sigmask, Efault); + user_access_end(); + + if (__copy_siginfo_to_user32(&frame->info, &ksig->info)) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ksig->ka.sa.sa_handler; + + /* Make -mregparm=3 work */ + regs->ax = sig; + regs->dx = (unsigned long) &frame->info; + regs->cx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + + return 0; +Efault: + user_access_end(); + return -EFAULT; +} diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h new file mode 100644 index 000000000..07949102a --- /dev/null +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are in machine order; things rely on that. + */ +#ifdef CONFIG_64BIT +GEN(rax) +GEN(rcx) +GEN(rdx) +GEN(rbx) +GEN(rsp) +GEN(rbp) +GEN(rsi) +GEN(rdi) +GEN(r8) +GEN(r9) +GEN(r10) +GEN(r11) +GEN(r12) +GEN(r13) +GEN(r14) +GEN(r15) +#else +GEN(eax) +GEN(ecx) +GEN(edx) +GEN(ebx) +GEN(esp) +GEN(ebp) +GEN(esi) +GEN(edi) +#endif diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild new file mode 100644 index 000000000..1e51650b7 --- /dev/null +++ b/arch/x86/include/asm/Kbuild @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + + +generated-y += syscalls_32.h +generated-y += syscalls_64.h +generated-y += syscalls_x32.h +generated-y += unistd_32_ia32.h +generated-y += unistd_64_x32.h +generated-y += xen-hypercalls.h + +generic-y += early_ioremap.h +generic-y += export.h +generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h new file mode 100644 index 000000000..d937c55e7 --- /dev/null +++ b/arch/x86/include/asm/acenv.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * X86 specific ACPICA environments and implementation + * + * Copyright (C) 2014, Intel Corporation + * Author: Lv Zheng + */ + +#ifndef _ASM_X86_ACENV_H +#define _ASM_X86_ACENV_H + +#include + +/* Asm macros */ + +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) + +int __acpi_acquire_global_lock(unsigned int *lock); +int __acpi_release_global_lock(unsigned int *lock); + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_release_global_lock(&facs->global_lock)) + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ + asm("divl %2;" \ + : "=a"(q32), "=d"(r32) \ + : "r"(d32), \ + "0"(n_lo), "1"(n_hi)) + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ + asm("shrl $1,%2 ;" \ + "rcrl $1,%3;" \ + : "=r"(n_hi), "=r"(n_lo) \ + : "0"(n_hi), "1"(n_lo)) + +#endif /* _ASM_X86_ACENV_H */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h new file mode 100644 index 000000000..8eb74cf38 --- /dev/null +++ b/arch/x86/include/asm/acpi.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_ACPI_H +#define _ASM_X86_ACPI_H + +/* + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2001 Patrick Mochel + */ +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_ACPI_APEI +# include +#endif + +#ifdef CONFIG_ACPI +extern int acpi_lapic; +extern int acpi_ioapic; +extern int acpi_noirq; +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_pci_disabled; +extern int acpi_skip_timer_override; +extern int acpi_use_timer_override; +extern int acpi_fix_pin2_polarity; +extern int acpi_disable_cmcff; + +extern u8 acpi_sci_flags; +extern u32 acpi_sci_override_gsi; +void acpi_pic_sci_set_trigger(unsigned int, u16); + +struct device; + +extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity); +extern void (*__acpi_unregister_gsi)(u32 gsi); + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); + +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +static inline void acpi_disable_pci(void) +{ + acpi_pci_disabled = 1; + acpi_noirq_set(); +} + +/* Low-level suspend routine. */ +extern int (*acpi_suspend_lowlevel)(void); + +/* Physical address to resume after wakeup */ +unsigned long acpi_get_wakeup_address(void); + +static inline bool acpi_skip_set_wakeup_address(void) +{ + return cpu_feature_enabled(X86_FEATURE_XENPV); +} + +#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address + +/* + * Check if the CPU can handle C2 and deeper + */ +static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) +{ + /* + * Early models (<=5) of AMD Opterons are not supposed to go into + * C2 state. + * + * Steppings 0x0A and later are good + */ + if (boot_cpu_data.x86 == 0x0F && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86_model <= 0x05 && + boot_cpu_data.x86_stepping < 0x0A) + return 1; + else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E)) + return 1; + else + return max_cstate; +} + +static inline bool arch_has_acpi_pdc(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + return (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR); +} + +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + if (cpu_has(c, X86_FEATURE_ACPI)) + buf[2] |= ACPI_PDC_T_FFH; + + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); +} + +static inline bool acpi_has_cpu_in_madt(void) +{ + return !!acpi_lapic; +} + +#define ACPI_HAVE_ARCH_SET_ROOT_POINTER +static inline void acpi_arch_set_root_pointer(u64 addr) +{ + x86_init.acpi.set_root_pointer(addr); +} + +#define ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return x86_init.acpi.get_root_pointer(); +} + +void acpi_generic_reduced_hw_init(void); + +void x86_default_set_root_pointer(u64 addr); +u64 x86_default_get_root_pointer(void); + +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 +#define acpi_disable_cmcff 0 +static inline void acpi_noirq_set(void) { } +static inline void acpi_disable_pci(void) { } +static inline void disable_acpi(void) { } + +static inline void acpi_generic_reduced_hw_init(void) { } + +static inline void x86_default_set_root_pointer(u64 addr) { } + +static inline u64 x86_default_get_root_pointer(void) +{ + return 0; +} + +#endif /* !CONFIG_ACPI */ + +#define ARCH_HAS_POWER_INIT 1 + +#ifdef CONFIG_ACPI_NUMA +extern int x86_acpi_numa_init(void); +#endif /* CONFIG_ACPI_NUMA */ + +struct cper_ia_proc_ctx; + +#ifdef CONFIG_ACPI_APEI +static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +{ + /* + * We currently have no way to look up the EFI memory map + * attributes for a region in a consistent way, because the + * memmap is discarded after efi_free_boot_services(). So if + * you call efi_mem_attributes() during boot and at runtime, + * you could theoretically see different attributes. + * + * We are yet to see any x86 platforms that require anything + * other than PAGE_KERNEL (some ARM64 platforms require the + * equivalent of PAGE_KERNEL_NOCACHE). Additionally, if SME + * is active, the ACPI information will not be encrypted, + * so return PAGE_KERNEL_NOENC until we know differently. + */ + return PAGE_KERNEL_NOENC; +} + +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); +#else +static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) +{ + return -EINVAL; +} +#endif + +#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT) + +#endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h new file mode 100644 index 000000000..1dd14381b --- /dev/null +++ b/arch/x86/include/asm/acrn.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ACRN_H +#define _ASM_X86_ACRN_H + +/* + * This CPUID returns feature bitmaps in EAX. + * Guest VM uses this to detect the appropriate feature bit. + */ +#define ACRN_CPUID_FEATURES 0x40000001 +/* Bit 0 indicates whether guest VM is privileged */ +#define ACRN_FEATURE_PRIVILEGED_VM BIT(0) + +/* + * Timing Information. + * This leaf returns the current TSC frequency in kHz. + * + * EAX: (Virtual) TSC frequency in kHz. + * EBX, ECX, EDX: RESERVED (reserved fields are set to zero). + */ +#define ACRN_CPUID_TIMING_INFO 0x40000010 + +void acrn_setup_intr_handler(void (*handler)(void)); +void acrn_remove_intr_handler(void); + +static inline u32 acrn_cpuid_base(void) +{ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return hypervisor_cpuid_base("ACRNACRNACRN", 0); + + return 0; +} + +static inline unsigned long acrn_get_tsc_khz(void) +{ + return cpuid_eax(ACRN_CPUID_TIMING_INFO); +} + +/* + * Hypercalls for ACRN + * + * - VMCALL instruction is used to implement ACRN hypercalls. + * - ACRN hypercall ABI: + * - Hypercall number is passed in R8 register. + * - Up to 2 arguments are passed in RDI, RSI. + * - Return value will be placed in RAX. + * + * Because GCC doesn't support R8 register as direct register constraints, use + * supported constraint as input with a explicit MOV to R8 in beginning of asm. + */ +static inline long acrn_hypercall0(unsigned long hcall_id) +{ + long result; + + asm volatile("movl %1, %%r8d\n\t" + "vmcall\n\t" + : "=a" (result) + : "g" (hcall_id) + : "r8", "memory"); + + return result; +} + +static inline long acrn_hypercall1(unsigned long hcall_id, + unsigned long param1) +{ + long result; + + asm volatile("movl %1, %%r8d\n\t" + "vmcall\n\t" + : "=a" (result) + : "g" (hcall_id), "D" (param1) + : "r8", "memory"); + + return result; +} + +static inline long acrn_hypercall2(unsigned long hcall_id, + unsigned long param1, + unsigned long param2) +{ + long result; + + asm volatile("movl %1, %%r8d\n\t" + "vmcall\n\t" + : "=a" (result) + : "g" (hcall_id), "D" (param1), "S" (param2) + : "r8", "memory"); + + return result; +} + +#endif /* _ASM_X86_ACRN_H */ diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h new file mode 100644 index 000000000..cd7b14322 --- /dev/null +++ b/arch/x86/include/asm/agp.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H + +#include +#include + +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cacheability attributes for the same + * page. This avoids data corruption on some CPUs. + */ + +#define map_page_into_agp(page) set_pages_uc(page, 1) +#define unmap_page_from_agp(page) set_pages_wb(page, 1) + +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ +#define flush_agp_cache() wbinvd() + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif /* _ASM_X86_AGP_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h new file mode 100644 index 000000000..9542c582d --- /dev/null +++ b/arch/x86/include/asm/alternative.h @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ALTERNATIVE_H +#define _ASM_X86_ALTERNATIVE_H + +#include +#include +#include + +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +#ifndef __ASSEMBLY__ + +#include + +/* + * Alternative inline assembly for SMP. + * + * The LOCK_PREFIX macro defined here replaces the LOCK and + * LOCK_PREFIX macros used everywhere in the source tree. + * + * SMP alternatives use the same data structures as the other + * alternatives and the X86_FEATURE_UP flag to indicate the case of a + * UP system running a SMP kernel. The existing apply_alternatives() + * works fine for patching a SMP kernel for UP. + * + * The SMP alternative tables can be kept after boot and contain both + * UP and SMP versions of the instructions to allow switching back to + * SMP at runtime, when hotplugging in a new CPU, which is especially + * useful in virtualized environments. + * + * The very common lock prefix is handled as special case in a + * separate table which is a pure address list without replacement ptr + * and size information. That keeps the table sizes small. + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX_HERE \ + ".pushsection .smp_locks,\"a\"\n" \ + ".balign 4\n" \ + ".long 671f - .\n" /* offset */ \ + ".popsection\n" \ + "671:" + +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " + +#else /* ! CONFIG_SMP */ +#define LOCK_PREFIX_HERE "" +#define LOCK_PREFIX "" +#endif + +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +#define ANNOTATE_IGNORE_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.ignore_alts\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); +extern void apply_ibt_endbr(s32 *start, s32 *end); + +struct module; + +#ifdef CONFIG_SMP +extern void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end); +extern void alternatives_smp_module_del(struct module *mod); +extern void alternatives_enable_smp(void); +extern int alternatives_text_reserved(void *start, void *end); +extern bool skip_smp_alternatives; +#else +static inline void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) {} +static inline void alternatives_smp_module_del(struct module *mod) {} +static inline void alternatives_enable_smp(void) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} +#endif /* CONFIG_SMP */ + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define OLDINSTR(oldinstr, num) \ + "# ALT: oldnstr\n" \ + "661:\n\t" oldinstr "\n662:\n" \ + "# ALT: padding\n" \ + ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \ + alt_end_marker ":\n" + +/* + * gas compatible max based on the idea from: + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. + */ +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "# ALT: oldinstr2\n" \ + "661:\n\t" oldinstr "\n662:\n" \ + "# ALT: padding2\n" \ + ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ + alt_end_marker ":\n" + +#define OLDINSTR_3(oldinsn, n1, n2, n3) \ + "# ALT: oldinstr3\n" \ + "661:\n\t" oldinsn "\n662:\n" \ + "# ALT: padding3\n" \ + ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ + " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ + " - (" alt_slen ")), 0x90\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \ + "# ALT: replacement " #num "\n" \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr, 1) \ + ".popsection\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, 2) \ + ".popsection\n" + +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, feature) + +#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ + OLDINSTR_3(oldinsn, 1, 2, 3) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feat1, 1) \ + ALTINSTR_ENTRY(feat2, 2) \ + ALTINSTR_ENTRY(feat3, 3) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinsn1, 1) \ + ALTINSTR_REPLACEMENT(newinsn2, 2) \ + ALTINSTR_REPLACEMENT(newinsn3, 3) \ + ".popsection\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") + +#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \ + asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory") + +/* + * Alternative inline assembly with input. + * + * Peculiarities: + * No memory clobber here. + * Argument numbers start with 1. + * Leaving an unused argument 0 to keep API compatibility. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input) + +/* + * This is similar to alternative_input. But it has two features and + * respective instructions. + * + * If CPU has feature2, newinstr2 is used. + * Otherwise, if CPU has feature1, newinstr1 is used. + * Otherwise, oldinstr is used. + */ +#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ + feature2, input...) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2) \ + : : "i" (0), ## input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input) + +/* Like alternative_io, but for replacing a direct call with another one. */ +#define alternative_call(oldfunc, newfunc, feature, output, input...) \ + asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + +/* + * Like alternative_call, but there are two features and respective functions. + * If CPU has feature2, function2 is used. + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ + output, input...) \ + asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ + "call %P[new2]", feature2) \ + : output, ASM_CALL_CONSTRAINT \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input) + +/* + * use this macro(s) if you need more than one output parameter + * in alternative_io + */ +#define ASM_OUTPUT2(a...) a + +/* + * use this macro if you need clobbers but no inputs in + * alternative_{input,io,call}() + */ +#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +672: lock + .pushsection .smp_locks,"a" + .balign 4 + .long 672b - . + .popsection + .endm +#else + .macro LOCK_PREFIX + .endm +#endif + +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +.macro ANNOTATE_IGNORE_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.ignore_alts + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140: + \oldinstr +141: + .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr +144: + .popsection +.endm + +#define old_len 141b-140b +#define new_len1 144f-143f +#define new_len2 145f-144f + +/* + * gas compatible max based on the idea from: + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. + */ +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140: + \oldinstr +141: + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr1 +144: + \newinstr2 +145: + .popsection +.endm + +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, feature + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h new file mode 100644 index 000000000..cb2a5e113 --- /dev/null +++ b/arch/x86/include/asm/amd-ibs.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * From PPR Vol 1 for AMD Family 19h Model 01h B1 + * 55898 Rev 0.35 - Feb 5, 2021 + */ + +#include + +/* IBS_OP_DATA2 DataSrc */ +#define IBS_DATA_SRC_LOC_CACHE 2 +#define IBS_DATA_SRC_DRAM 3 +#define IBS_DATA_SRC_REM_CACHE 4 +#define IBS_DATA_SRC_IO 7 + +/* IBS_OP_DATA2 DataSrc Extension */ +#define IBS_DATA_SRC_EXT_LOC_CACHE 1 +#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2 +#define IBS_DATA_SRC_EXT_DRAM 3 +#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5 +#define IBS_DATA_SRC_EXT_PMEM 6 +#define IBS_DATA_SRC_EXT_IO 7 +#define IBS_DATA_SRC_EXT_EXT_MEM 8 +#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12 + +/* + * IBS Hardware MSRs + */ + +/* MSR 0xc0011030: IBS Fetch Control */ +union ibs_fetch_ctl { + __u64 val; + struct { + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ + fetch_cnt:16, /* 16-31: instruction fetch count */ + fetch_lat:16, /* 32-47: instruction fetch latency */ + fetch_en:1, /* 48: instruction fetch enable */ + fetch_val:1, /* 49: instruction fetch valid */ + fetch_comp:1, /* 50: instruction fetch complete */ + ic_miss:1, /* 51: i-cache miss */ + phy_addr_valid:1,/* 52: physical address valid */ + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size + * (needs IbsPhyAddrValid) */ + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ + rand_en:1, /* 57: random tagging enable */ + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch + * (needs IbsFetchComp) */ + l3_miss_only:1, /* 59: Collect L3 miss samples only */ + fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */ + fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */ + reserved:2; /* 62-63: reserved */ + }; +}; + +/* MSR 0xc0011033: IBS Execution Control */ +union ibs_op_ctl { + __u64 val; + struct { + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ + l3_miss_only:1, /* 16: Collect L3 miss samples only */ + op_en:1, /* 17: op sampling enable */ + op_val:1, /* 18: op sample valid */ + cnt_ctl:1, /* 19: periodic op counter control */ + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ + reserved0:5, /* 27-31: reserved */ + opcurcnt:27, /* 32-58: periodic op counter current count */ + reserved1:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011035: IBS Op Data 1 */ +union ibs_op_data { + __u64 val; + struct { + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ + reserved1:2, /* 32-33: reserved */ + op_return:1, /* 34: return op */ + op_brn_taken:1, /* 35: taken branch op */ + op_brn_misp:1, /* 36: mispredicted branch op */ + op_brn_ret:1, /* 37: branch op retired */ + op_rip_invalid:1, /* 38: RIP is invalid */ + op_brn_fuse:1, /* 39: fused branch op */ + op_microcode:1, /* 40: microcode op */ + reserved2:23; /* 41-63: reserved */ + }; +}; + +/* MSR 0xc0011036: IBS Op Data 2 */ +union ibs_op_data2 { + __u64 val; + struct { + __u64 data_src_lo:3, /* 0-2: data source low */ + reserved0:1, /* 3: reserved */ + rmt_node:1, /* 4: destination node */ + cache_hit_st:1, /* 5: cache hit state */ + data_src_hi:2, /* 6-7: data source high */ + reserved1:56; /* 8-63: reserved */ + }; +}; + +/* MSR 0xc0011037: IBS Op Data 3 */ +union ibs_op_data3 { + __u64 val; + struct { + __u64 ld_op:1, /* 0: load op */ + st_op:1, /* 1: store op */ + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ + dc_miss:1, /* 7: data cache miss */ + dc_mis_acc:1, /* 8: misaligned access */ + reserved:4, /* 9-12: reserved */ + dc_wc_mem_acc:1, /* 13: write combining memory access */ + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ + dc_locked_op:1, /* 15: locked operation */ + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ + l2_miss:1, /* 20: L2 cache miss */ + sw_pf:1, /* 21: software prefetch */ + op_mem_width:4, /* 22-25: load/store size in bytes */ + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ + dc_miss_lat:16, /* 32-47: data cache miss latency */ + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ + }; +}; + +/* MSR 0xc001103c: IBS Fetch Control Extended */ +union ic_ibs_extd_ctl { + __u64 val; + struct { + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ + reserved:48; /* 16-63: reserved */ + }; +}; + +/* + * IBS driver related + */ + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; diff --git a/arch/x86/include/asm/amd_hsmp.h b/arch/x86/include/asm/amd_hsmp.h new file mode 100644 index 000000000..03c2ce3ed --- /dev/null +++ b/arch/x86/include/asm/amd_hsmp.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _ASM_X86_AMD_HSMP_H_ +#define _ASM_X86_AMD_HSMP_H_ + +#include + +#if IS_ENABLED(CONFIG_AMD_HSMP) +int hsmp_send_message(struct hsmp_message *msg); +#else +static inline int hsmp_send_message(struct hsmp_message *msg) +{ + return -ENODEV; +} +#endif +#endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h new file mode 100644 index 000000000..ed0eaf65c --- /dev/null +++ b/arch/x86/include/asm/amd_nb.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_NB_H +#define _ASM_X86_AMD_NB_H + +#include +#include +#include + +struct amd_nb_bus_dev_range { + u8 bus; + u8 dev_base; + u8 dev_limit; +}; + +extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; + +extern bool early_is_amd_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); +extern void amd_flush_garts(void); +extern int amd_numa_init(void); +extern int amd_get_subcaches(int); +extern int amd_set_subcaches(int, unsigned long); + +extern int amd_smn_read(u16 node, u32 address, u32 *value); +extern int amd_smn_write(u16 node, u32 address, u32 value); + +struct amd_l3_cache { + unsigned indices; + u8 subcaches[4]; +}; + +struct threshold_block { + unsigned int block; /* Number within bank */ + unsigned int bank; /* MCA bank the block belongs to */ + unsigned int cpu; /* CPU which controls MCA bank */ + u32 address; /* MSR address for the block */ + u16 interrupt_enable; /* Enable/Disable APIC interrupt */ + bool interrupt_capable; /* Bank can generate an interrupt. */ + + u16 threshold_limit; /* + * Value upon which threshold + * interrupt is generated. + */ + + struct kobject kobj; /* sysfs object */ + struct list_head miscj; /* + * List of threshold blocks + * within a bank. + */ +}; + +struct threshold_bank { + struct kobject *kobj; + struct threshold_block *blocks; + + /* initialized to the number of CPUs on the node sharing this bank */ + refcount_t cpus; + unsigned int shared; +}; + +struct amd_northbridge { + struct pci_dev *root; + struct pci_dev *misc; + struct pci_dev *link; + struct amd_l3_cache l3_cache; + struct threshold_bank *bank4; +}; + +struct amd_northbridge_info { + u16 num; + u64 flags; + struct amd_northbridge *nb; +}; + +#define AMD_NB_GART BIT(0) +#define AMD_NB_L3_INDEX_DISABLE BIT(1) +#define AMD_NB_L3_PARTITIONING BIT(2) + +#ifdef CONFIG_AMD_NB + +u16 amd_nb_num(void); +bool amd_nb_has_feature(unsigned int feature); +struct amd_northbridge *node_to_amd_nb(int node); + +static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) +{ + struct pci_dev *misc; + int i; + + for (i = 0; i != amd_nb_num(); i++) { + misc = node_to_amd_nb(i)->misc; + + if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + } + + WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev)); + return 0; +} + +static inline bool amd_gart_present(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return false; + + /* GART present only on Fam15h, upto model 0fh */ + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || + (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) + return true; + + return false; +} + +#else + +#define amd_nb_num(x) 0 +#define amd_nb_has_feature(x) false +#define node_to_amd_nb(x) NULL +#define amd_gart_present(x) false + +#endif + + +#endif /* _ASM_X86_AMD_NB_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h new file mode 100644 index 000000000..3216da707 --- /dev/null +++ b/arch/x86/include/asm/apic.h @@ -0,0 +1,525 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_APIC_H +#define _ASM_X86_APIC_H + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARCH_APICTIMER_STOPS_ON_C3 1 + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +/* Macros for apic_extnmi which controls external NMI masking */ +#define APIC_EXTNMI_BSP 0 /* Default */ +#define APIC_EXTNMI_ALL 1 +#define APIC_EXTNMI_NONE 2 + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) +extern void generic_apic_probe(void); +#else +static inline void generic_apic_probe(void) +{ +} +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +extern int apic_verbosity; +extern int local_apic_timer_c2_ok; + +extern int disable_apic; +extern unsigned int lapic_timer_period; + +extern enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id { + APIC_PIC, + APIC_VIRTUAL_WIRE, + APIC_VIRTUAL_WIRE_NO_CONFIG, + APIC_SYMMETRIC_IO, + APIC_SYMMETRIC_IO_NO_ROUTING +}; + +#ifdef CONFIG_SMP +extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ + +static inline void default_inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} + +/* + * With 82489DX we can't rely on apic feature bit + * retrieved via cpuid but still have to deal with + * such an apic chip so we assume that SMP configuration + * is found from MP table (64bit case uses ACPI mostly + * which set smp presence flag as well so we are safe + * to use this helper too). + */ +static inline bool apic_from_smp_config(void) +{ + return smp_found_config && !disable_apic; +} + +/* + * Basic functions accessing APICs. + */ +#ifdef CONFIG_PARAVIRT +#include +#endif + +static inline void native_apic_mem_write(u32 reg, u32 v) +{ + volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); + + alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, + ASM_OUTPUT2("=r" (v), "=m" (*addr)), + ASM_OUTPUT2("0" (v), "m" (*addr))); +} + +static inline u32 native_apic_mem_read(u32 reg) +{ + return *((volatile u32 *)(APIC_BASE + reg)); +} + +extern void native_apic_wait_icr_idle(void); +extern u32 native_safe_apic_wait_icr_idle(void); +extern void native_apic_icr_write(u32 low, u32 id); +extern u64 native_apic_icr_read(void); + +static inline bool apic_is_x2apic_enabled(void) +{ + u64 msr; + + if (rdmsrl_safe(MSR_IA32_APICBASE, &msr)) + return false; + return msr & X2APIC_ENABLE; +} + +extern void enable_IR_x2apic(void); + +extern int get_physical_broadcast(void); + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void apic_soft_disable(void); +extern void lapic_shutdown(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); +extern void apic_intr_mode_init(void); +extern void init_apic_mappings(void); +void register_lapic_address(unsigned long address); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern void lapic_update_tsc_freq(void); + +#ifdef CONFIG_X86_64 +static inline int apic_force_enable(unsigned long addr) +{ + return -1; +} +#else +extern int apic_force_enable(unsigned long addr); +#endif + +extern void apic_ap_setup(void); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} +#endif + +extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); +extern void lapic_assign_system_vectors(void); +extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); +extern void lapic_online(void); +extern void lapic_offline(void); +extern bool apic_needs_pit(void); + +extern void apic_send_IPI_allbutself(unsigned int vector); + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } +# define setup_boot_APIC_clock x86_init_noop +# define setup_secondary_APIC_clock x86_init_noop +static inline void lapic_update_tsc_freq(void) { } +static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } +static inline void apic_intr_mode_init(void) { } +static inline void lapic_assign_system_vectors(void) { } +static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +static inline bool apic_needs_pit(void) { return true; } +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_X2APIC +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline void native_apic_msr_eoi_write(u32 reg, u32 v) +{ + __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u64 msr; + + if (reg == APIC_DFR) + return -1; + + rdmsrl(APIC_BASE_MSR + (reg >> 4), msr); + return (u32)msr; +} + +static inline void native_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static inline u32 native_safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +static inline void native_x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +static inline u64 native_x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +extern int x2apic_mode; +extern int x2apic_phys; +extern void __init x2apic_set_max_apicid(u32 apicid); +extern void x2apic_setup(void); +static inline int x2apic_enabled(void) +{ + return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled(); +} + +#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC)) +#else /* !CONFIG_X86_X2APIC */ +static inline void x2apic_setup(void) { } +static inline int x2apic_enabled(void) { return 0; } + +#define x2apic_mode (0) +#define x2apic_supported() (0) +#endif /* !CONFIG_X86_X2APIC */ +extern void __init check_x2apic(void); + +struct irq_data; + +/* + * Copyright 2004 James Cleverdon, IBM. + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +struct apic { + /* Hotpath functions first */ + void (*eoi_write)(u32 reg, u32 v); + void (*native_eoi_write)(u32 reg, u32 v); + void (*write)(u32 reg, u32 v); + u32 (*read)(u32 reg); + + /* IPI related functions */ + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); + + void (*send_IPI)(int cpu, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + u32 disable_esr; + + enum apic_delivery_modes delivery_mode; + bool dest_mode_logical; + + u32 (*calc_dest_apicid)(unsigned int cpu); + + /* ICR related functions */ + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + + /* Probe, setup and smpboot functions */ + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_valid)(u32 apicid); + int (*apic_id_registered)(void); + + bool (*check_apicid_used)(physid_mask_t *map, int apicid); + void (*init_apic_ldr)(void); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); + void (*setup_apic_routing)(void); + int (*cpu_present_to_apicid)(int mps_cpu); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); + int (*check_phys_apicid_present)(int phys_apicid); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + u32 (*get_apic_id)(unsigned long x); + u32 (*set_apic_id)(unsigned int id); + + /* wakeup_secondary_cpu */ + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + /* wakeup secondary CPU using 64-bit wakeup point */ + int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip); + + void (*inquire_remote_apic)(int apicid); + +#ifdef CONFIG_X86_32 + /* + * Called very early during boot from get_smp_config(). It should + * return the logical apicid. x86_[bios]_cpu_to_apicid is + * initialized before this function is called. + * + * If logical apicid can't be determined that early, the function + * may return BAD_APICID. Logical apicid will be configured after + * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity + * won't be applied properly during early boot in this case. + */ + int (*x86_32_early_logical_apicid)(int cpu); +#endif + char *name; +}; + +/* + * Pointer to the local APIC driver in use on this system (there's + * always just one such driver in use - the kernel decides via an + * early probing process which one it picks - and then sticks to it): + */ +extern struct apic *apic; + +/* + * APIC drivers are probed based on how they are listed in the .apicdrivers + * section. So the order is important and enforced by the ordering + * of different apic driver files in the Makefile. + * + * For the files having two apic drivers, we use apic_drivers() + * to enforce the order with in them. + */ +#define apic_driver(sym) \ + static const struct apic *__apicdrivers_##sym __used \ + __aligned(sizeof(struct apic *)) \ + __section(".apicdrivers") = { &sym } + +#define apic_drivers(sym1, sym2) \ + static struct apic *__apicdrivers_##sym1##sym2[2] __used \ + __aligned(sizeof(struct apic *)) \ + __section(".apicdrivers") = { &sym1, &sym2 } + +extern struct apic *__apicdrivers[], *__apicdrivers_end[]; + +/* + * APIC functionality to boot other CPUs - only used on SMP: + */ +#ifdef CONFIG_SMP +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int lapic_can_unplug_cpu(void); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +static inline u32 apic_read(u32 reg) +{ + return apic->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic->write(reg, val); +} + +static inline void apic_eoi(void) +{ + apic->eoi_write(APIC_EOI, APIC_EOI_ACK); +} + +static inline u64 apic_icr_read(void) +{ + return apic->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle(); +} + +extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)); + +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline u32 apic_read(u32 reg) { return 0; } +static inline void apic_write(u32 reg, u32 val) { } +static inline void apic_eoi(void) { } +static inline u64 apic_icr_read(void) { return 0; } +static inline void apic_icr_write(u32 low, u32 high) { } +static inline void apic_wait_icr_idle(void) { } +static inline u32 safe_apic_wait_icr_idle(void) { return 0; } +static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} + +#endif /* CONFIG_X86_LOCAL_APIC */ + +extern void apic_ack_irq(struct irq_data *data); + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + apic_eoi(); +} + + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + +/* + * Warm reset vector position: + */ +#define TRAMPOLINE_PHYS_LOW 0x467 +#define TRAMPOLINE_PHYS_HIGH 0x469 + +extern void generic_bigsmp_probe(void); + +#ifdef CONFIG_X86_LOCAL_APIC + +#include + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); + +extern struct apic apic_noop; + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg = apic_read(APIC_ID); + + return apic->get_apic_id(reg); +} + +#ifdef CONFIG_X86_64 +typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip); +extern void acpi_wake_cpu_handler_update(wakeup_cpu_handler handler); +#endif + +extern int default_apic_id_valid(u32 apicid); +extern int default_acpi_madt_oem_check(char *, char *); +extern void default_setup_apic_routing(void); + +extern u32 apic_default_calc_apicid(unsigned int cpu); +extern u32 apic_flat_calc_apicid(unsigned int cpu); + +extern bool default_check_apicid_used(physid_mask_t *map, int apicid); +extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int phys_apicid); + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_SMP +bool apic_id_is_primary_thread(unsigned int id); +void apic_smt_update(void); +#else +static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } +static inline void apic_smt_update(void) { } +#endif + +struct msi_msg; +struct irq_cfg; + +extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, + bool dmar); + +extern void ioapic_zap_locks(void); + +#endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h new file mode 100644 index 000000000..68d213e83 --- /dev/null +++ b/arch/x86/include/asm/apicdef.h @@ -0,0 +1,438 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_APICDEF_H +#define _ASM_X86_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox , 1995. + * Ingo Molnar , 1999, 2000 + */ + +#define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + +#define APIC_ID 0x20 + +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define APIC_LVR_DIRECTED_EOI (1 << 24) +#define GET_APIC_VERSION(x) ((x) & 0xFFu) +#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu) +#ifdef CONFIG_X86_32 +# define APIC_INTEGRATED(x) ((x) & 0xF0u) +#else +# define APIC_INTEGRATED(x) (1) +#endif +#define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_EXT_SPACE(x) ((x) & 0x80000000) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFFu +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFFu +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EOI_ACK 0x0 /* Docs say 0 for future compat. */ +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFFu << 24) +#define GET_APIC_LOGICAL_ID(x) (((x) >> 24) & 0xFFu) +#define SET_APIC_LOGICAL_ID(x) (((x) << 24)) +#define APIC_ALL_CPUS 0xFFu +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul +#define APIC_DFR_FLAT 0xFFFFFFFFul +#define APIC_SPIV 0xF0 +#define APIC_SPIV_DIRECTED_EOI (1 << 12) +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 +#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_LVTCMCI 0x2f0 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_XAPIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) +#define SET_XAPIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVTTHMR 0x330 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_LVT_LEVEL_TRIGGER (1 << 15) +#define APIC_LVT_REMOTE_IRR (1 << 14) +#define APIC_INPUT_POLARITY (1 << 13) +#define APIC_SEND_PENDING (1 << 12) +#define APIC_MODE_MASK 0x700 +#define GET_APIC_DELIVERY_MODE(x) (((x) >> 8) & 0x7) +#define SET_APIC_DELIVERY_MODE(x, y) (((x) & ~0x700) | ((y) << 8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXTINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 +#define APIC_TDR_DIV_TMBASE (1 << 2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA +#define APIC_EFEAT 0x400 +#define APIC_ECTRL 0x410 +#define APIC_EILVTn(n) (0x500 + 0x10 * n) +#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ +#define APIC_EILVT_NR_AMD_10H 4 +#define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H +#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) +#define APIC_EILVT_MSG_FIX 0x0 +#define APIC_EILVT_MSG_SMI 0x2 +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_MSG_EXT 0x7 +#define APIC_EILVT_MASKED (1 << 16) + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define XAPIC_ENABLE (1UL << 11) +#define X2APIC_ENABLE (1UL << 10) + +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +# define MAX_LOCAL_APIC 256 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +/* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. + */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) +#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) +#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) +#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) +#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { /* LVT - Thermal Sensor */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_thermal; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#ifdef CONFIG_X86_32 + #define BAD_APICID 0xFFu +#else + #define BAD_APICID 0xFFFFu +#endif + +enum apic_delivery_modes { + APIC_DELIVERY_MODE_FIXED = 0, + APIC_DELIVERY_MODE_LOWESTPRIO = 1, + APIC_DELIVERY_MODE_SMI = 2, + APIC_DELIVERY_MODE_NMI = 4, + APIC_DELIVERY_MODE_INIT = 5, + APIC_DELIVERY_MODE_EXTINT = 7, +}; + +#endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h new file mode 100644 index 000000000..4d4015ddc --- /dev/null +++ b/arch/x86/include/asm/apm.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Machine specific APM BIOS functions for generic. + * Split out from apm.c by Osamu Tomita + */ + +#ifndef _ASM_X86_MACH_DEFAULT_APM_H +#define _ASM_X86_MACH_DEFAULT_APM_H + +#ifdef APM_ZERO_SEGS +# define APM_DO_ZERO_SEGS \ + "pushl %%ds\n\t" \ + "pushl %%es\n\t" \ + "xorl %%edx, %%edx\n\t" \ + "mov %%dx, %%ds\n\t" \ + "mov %%dx, %%es\n\t" \ + "mov %%dx, %%fs\n\t" \ + "mov %%dx, %%gs\n\t" +# define APM_DO_POP_SEGS \ + "popl %%es\n\t" \ + "popl %%ds\n\t" +#else +# define APM_DO_ZERO_SEGS +# define APM_DO_POP_SEGS +#endif + +static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, + u32 *eax, u32 *ebx, u32 *ecx, + u32 *edx, u32 *esi) +{ + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%al\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx), + "=S" (*esi) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); +} + +static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) +{ + int cx, dx, si; + bool error; + + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%bl\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (error), "=c" (cx), "=d" (dx), + "=S" (si) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); + return error; +} + +#endif /* _ASM_X86_MACH_DEFAULT_APM_H */ diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000..ba88edd0d --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HWEIGHT_H +#define _ASM_X86_HWEIGHT_H + +#include + +#ifdef CONFIG_64BIT +#define REG_IN "D" +#define REG_OUT "a" +#else +#define REG_IN "a" +#define REG_OUT "a" +#endif + +static __always_inline unsigned int __arch_hweight32(unsigned int w) +{ + unsigned int res; + + asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); + + return res; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +#ifdef CONFIG_X86_32 +static inline unsigned long __arch_hweight64(__u64 w) +{ + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +} +#else +static __always_inline unsigned long __arch_hweight64(__u64 w) +{ + unsigned long res; + + asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); + + return res; +} +#endif /* CONFIG_X86_32 */ + +#endif diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h new file mode 100644 index 000000000..02bae8e07 --- /dev/null +++ b/arch/x86/include/asm/archrandom.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011-2014, Intel Corporation + * Authors: Fenghua Yu , + * H. Peter Anvin + */ + +#ifndef ASM_X86_ARCHRANDOM_H +#define ASM_X86_ARCHRANDOM_H + +#include +#include + +#define RDRAND_RETRY_LOOPS 10 + +/* Unconditional execution of RDRAND and RDSEED */ + +static inline bool __must_check rdrand_long(unsigned long *v) +{ + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile("rdrand %[out]" + CC_SET(c) + : CC_OUT(c) (ok), [out] "=r" (*v)); + if (ok) + return true; + } while (--retry); + return false; +} + +static inline bool __must_check rdseed_long(unsigned long *v) +{ + bool ok; + asm volatile("rdseed %[out]" + CC_SET(c) + : CC_OUT(c) (ok), [out] "=r" (*v)); + return ok; +} + +/* + * These are the generic interfaces; they must not be declared if the + * stubs in are to be invoked. + */ + +static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) +{ + return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0; +} + +static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) +{ + return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0; +} + +#ifndef CONFIG_UML +void x86_init_rdrand(struct cpuinfo_x86 *c); +#endif + +#endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h new file mode 100644 index 000000000..d370ee36a --- /dev/null +++ b/arch/x86/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h new file mode 100644 index 000000000..8f80de627 --- /dev/null +++ b/arch/x86/include/asm/asm-prototypes.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG64 +extern void cmpxchg8b_emu(void); +#endif + diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h new file mode 100644 index 000000000..fbcfec4dc --- /dev/null +++ b/arch/x86/include/asm/asm.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ASM_H +#define _ASM_X86_ASM_H + +#ifdef __ASSEMBLY__ +# define __ASM_FORM(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, +# define __ASM_REGPFX % +#else +#include +# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " +# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) +# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," +# define __ASM_REGPFX %% +#endif + +#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) + +#ifndef __x86_64__ +/* 32 bit */ +# define __ASM_SEL(a,b) __ASM_FORM(a) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) +#else +/* 64 bit */ +# define __ASM_SEL(a,b) __ASM_FORM(b) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) +#endif + +#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ + inst##q##__VA_ARGS__) +#define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg) + +#define _ASM_PTR __ASM_SEL(.long, .quad) +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) + +#define _ASM_MOV __ASM_SIZE(mov) +#define _ASM_INC __ASM_SIZE(inc) +#define _ASM_DEC __ASM_SIZE(dec) +#define _ASM_ADD __ASM_SIZE(add) +#define _ASM_SUB __ASM_SIZE(sub) +#define _ASM_XADD __ASM_SIZE(xadd) +#define _ASM_MUL __ASM_SIZE(mul) + +#define _ASM_AX __ASM_REG(ax) +#define _ASM_BX __ASM_REG(bx) +#define _ASM_CX __ASM_REG(cx) +#define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) + +/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */ +#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip)) + +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + +/* + * Macros to generate condition code outputs from inline assembly, + * The output operand must be type "bool". + */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" +# define CC_OUT(c) "=@cc" #c +#else +# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" +# define CC_OUT(c) [_cc_ ## c] "=qm" +#endif + +#ifdef __KERNEL__ + +# include + +/* Exception table entry */ +#ifdef __ASSEMBLY__ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ + .pushsection "__ex_table","a" ; \ + .balign 4 ; \ + .long (from) - . ; \ + .long (to) - . ; \ + .long type ; \ + .popsection + +# ifdef CONFIG_KPROBES +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection +# else +# define _ASM_NOKPROBE(entry) +# endif + +#else /* ! __ASSEMBLY__ */ + +# define DEFINE_EXTABLE_TYPE_REG \ + ".macro extable_type_reg type:req reg:req\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ + ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ + ".endif\n" \ + ".set .Lregnr, .Lregnr+1\n" \ + ".endr\n" \ + ".set .Lregnr, 0\n" \ + ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ + ".endif\n" \ + ".set .Lregnr, .Lregnr+1\n" \ + ".endr\n" \ + ".if (.Lfound != 1)\n" \ + ".error \"extable_type_reg: bad register argument\"\n" \ + ".endif\n" \ + ".endm\n" + +# define UNDEFINE_EXTABLE_TYPE_REG \ + ".purgem extable_type_reg\n" + +# define _ASM_EXTABLE_TYPE(from, to, type) \ + " .pushsection \"__ex_table\",\"a\"\n" \ + " .balign 4\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - .\n" \ + " .long " __stringify(type) " \n" \ + " .popsection\n" + +# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \ + " .pushsection \"__ex_table\",\"a\"\n" \ + " .balign 4\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - .\n" \ + DEFINE_EXTABLE_TYPE_REG \ + "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\ + UNDEFINE_EXTABLE_TYPE_REG \ + " .popsection\n" + +/* For C file, we already have NOKPROBE_SYMBOL macro */ + +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif /* __ASSEMBLY__ */ + +#define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) + +#define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS) + +#define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY) + +#define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT) + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h new file mode 100644 index 000000000..5e754e895 --- /dev/null +++ b/arch/x86/include/asm/atomic.h @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ATOMIC_H +#define _ASM_X86_ATOMIC_H + +#include +#include +#include +#include +#include +#include + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +/** + * arch_atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static __always_inline int arch_atomic_read(const atomic_t *v) +{ + /* + * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, + * it's non-inlined function that increases binary size and stack usage. + */ + return __READ_ONCE((v)->counter); +} + +/** + * arch_atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static __always_inline void arch_atomic_set(atomic_t *v, int i) +{ + __WRITE_ONCE(v->counter, i); +} + +/** + * arch_atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static __always_inline void arch_atomic_add(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "+m" (v->counter) + : "ir" (i) : "memory"); +} + +/** + * arch_atomic_sub - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __always_inline void arch_atomic_sub(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0" + : "+m" (v->counter) + : "ir" (i) : "memory"); +} + +/** + * arch_atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); +} +#define arch_atomic_sub_and_test arch_atomic_sub_and_test + +/** + * arch_atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static __always_inline void arch_atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter) :: "memory"); +} +#define arch_atomic_inc arch_atomic_inc + +/** + * arch_atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static __always_inline void arch_atomic_dec(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0" + : "+m" (v->counter) :: "memory"); +} +#define arch_atomic_dec arch_atomic_dec + +/** + * arch_atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) +{ + return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); +} +#define arch_atomic_dec_and_test arch_atomic_dec_and_test + +/** + * arch_atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) +{ + return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); +} +#define arch_atomic_inc_and_test arch_atomic_inc_and_test + +/** + * arch_atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); +} +#define arch_atomic_add_negative arch_atomic_add_negative + +/** + * arch_atomic_add_return - add integer and return + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) +{ + return i + xadd(&v->counter, i); +} +#define arch_atomic_add_return arch_atomic_add_return + +/** + * arch_atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) +{ + return arch_atomic_add_return(-i, v); +} +#define arch_atomic_sub_return arch_atomic_sub_return + +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) +{ + return xadd(&v->counter, i); +} +#define arch_atomic_fetch_add arch_atomic_fetch_add + +static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) +{ + return xadd(&v->counter, -i); +} +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return arch_cmpxchg(&v->counter, old, new); +} +#define arch_atomic_cmpxchg arch_atomic_cmpxchg + +static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + return arch_try_cmpxchg(&v->counter, old, new); +} +#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg + +static __always_inline int arch_atomic_xchg(atomic_t *v, int new) +{ + return arch_xchg(&v->counter, new); +} +#define arch_atomic_xchg arch_atomic_xchg + +static __always_inline void arch_atomic_and(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "andl %1,%0" + : "+m" (v->counter) + : "ir" (i) + : "memory"); +} + +static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) +{ + int val = arch_atomic_read(v); + + do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); + + return val; +} +#define arch_atomic_fetch_and arch_atomic_fetch_and + +static __always_inline void arch_atomic_or(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "orl %1,%0" + : "+m" (v->counter) + : "ir" (i) + : "memory"); +} + +static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) +{ + int val = arch_atomic_read(v); + + do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); + + return val; +} +#define arch_atomic_fetch_or arch_atomic_fetch_or + +static __always_inline void arch_atomic_xor(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "xorl %1,%0" + : "+m" (v->counter) + : "ir" (i) + : "memory"); +} + +static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) +{ + int val = arch_atomic_read(v); + + do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); + + return val; +} +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +#endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h new file mode 100644 index 000000000..5efd01b54 --- /dev/null +++ b/arch/x86/include/asm/atomic64_32.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ATOMIC64_32_H +#define _ASM_X86_ATOMIC64_32_H + +#include +#include +//#include + +/* An 64bit atomic type */ + +typedef struct { + s64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...) +#ifndef ATOMIC64_EXPORT +#define ATOMIC64_DECL_ONE __ATOMIC64_DECL +#else +#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \ + ATOMIC64_EXPORT(atomic64_##sym) +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +#define __alternative_atomic64(f, g, out, in...) \ + asm volatile("call %P[func]" \ + : out : [func] "i" (atomic64_##g##_cx8), ## in) + +#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) +#else +#define __alternative_atomic64(f, g, out, in...) \ + alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ + X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in) + +#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \ + ATOMIC64_DECL_ONE(sym##_386) + +ATOMIC64_DECL_ONE(add_386); +ATOMIC64_DECL_ONE(sub_386); +ATOMIC64_DECL_ONE(inc_386); +ATOMIC64_DECL_ONE(dec_386); +#endif + +#define alternative_atomic64(f, out, in...) \ + __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) + +ATOMIC64_DECL(read); +ATOMIC64_DECL(set); +ATOMIC64_DECL(xchg); +ATOMIC64_DECL(add_return); +ATOMIC64_DECL(sub_return); +ATOMIC64_DECL(inc_return); +ATOMIC64_DECL(dec_return); +ATOMIC64_DECL(dec_if_positive); +ATOMIC64_DECL(inc_not_zero); +ATOMIC64_DECL(add_unless); + +#undef ATOMIC64_DECL +#undef ATOMIC64_DECL_ONE +#undef __ATOMIC64_DECL +#undef ATOMIC64_EXPORT + +/** + * arch_atomic64_cmpxchg - cmpxchg atomic64 variable + * @v: pointer to type atomic64_t + * @o: expected value + * @n: new value + * + * Atomically sets @v to @n if it was equal to @o and returns + * the old value. + */ + +static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) +{ + return arch_cmpxchg64(&v->counter, o, n); +} +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg + +/** + * arch_atomic64_xchg - xchg atomic64 variable + * @v: pointer to type atomic64_t + * @n: value to assign + * + * Atomically xchgs the value of @v to @n and returns + * the old value. + */ +static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) +{ + s64 o; + unsigned high = (unsigned)(n >> 32); + unsigned low = (unsigned)n; + alternative_atomic64(xchg, "=&A" (o), + "S" (v), "b" (low), "c" (high) + : "memory"); + return o; +} +#define arch_atomic64_xchg arch_atomic64_xchg + +/** + * arch_atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @i: value to assign + * + * Atomically sets the value of @v to @n. + */ +static inline void arch_atomic64_set(atomic64_t *v, s64 i) +{ + unsigned high = (unsigned)(i >> 32); + unsigned low = (unsigned)i; + alternative_atomic64(set, /* no output */, + "S" (v), "b" (low), "c" (high) + : "eax", "edx", "memory"); +} + +/** + * arch_atomic64_read - read atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically reads the value of @v and returns it. + */ +static inline s64 arch_atomic64_read(const atomic64_t *v) +{ + s64 r; + alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); + return r; +} + +/** + * arch_atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + *@v + */ +static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +{ + alternative_atomic64(add_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} +#define arch_atomic64_add_return arch_atomic64_add_return + +/* + * Other variants with different arithmetic operators: + */ +static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) +{ + alternative_atomic64(sub_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} +#define arch_atomic64_sub_return arch_atomic64_sub_return + +static inline s64 arch_atomic64_inc_return(atomic64_t *v) +{ + s64 a; + alternative_atomic64(inc_return, "=&A" (a), + "S" (v) : "memory", "ecx"); + return a; +} +#define arch_atomic64_inc_return arch_atomic64_inc_return + +static inline s64 arch_atomic64_dec_return(atomic64_t *v) +{ + s64 a; + alternative_atomic64(dec_return, "=&A" (a), + "S" (v) : "memory", "ecx"); + return a; +} +#define arch_atomic64_dec_return arch_atomic64_dec_return + +/** + * arch_atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) +{ + __alternative_atomic64(add, add_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} + +/** + * arch_atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) +{ + __alternative_atomic64(sub, sub_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} + +/** + * arch_atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void arch_atomic64_inc(atomic64_t *v) +{ + __alternative_atomic64(inc, inc_return, /* no output */, + "S" (v) : "memory", "eax", "ecx", "edx"); +} +#define arch_atomic64_inc arch_atomic64_inc + +/** + * arch_atomic64_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void arch_atomic64_dec(atomic64_t *v) +{ + __alternative_atomic64(dec, dec_return, /* no output */, + "S" (v) : "memory", "eax", "ecx", "edx"); +} +#define arch_atomic64_dec arch_atomic64_dec + +/** + * arch_atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if the add was done, zero otherwise. + */ +static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) +{ + unsigned low = (unsigned)u; + unsigned high = (unsigned)(u >> 32); + alternative_atomic64(add_unless, + ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), + "S" (v) : "memory"); + return (int)a; +} +#define arch_atomic64_add_unless arch_atomic64_add_unless + +static inline int arch_atomic64_inc_not_zero(atomic64_t *v) +{ + int r; + alternative_atomic64(inc_not_zero, "=&a" (r), + "S" (v) : "ecx", "edx", "memory"); + return r; +} +#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero + +static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) +{ + s64 r; + alternative_atomic64(dec_if_positive, "=&A" (r), + "S" (v) : "ecx", "memory"); + return r; +} +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive + +#undef alternative_atomic64 +#undef __alternative_atomic64 + +static inline void arch_atomic64_and(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) + c = old; +} + +static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) + c = old; + + return old; +} +#define arch_atomic64_fetch_and arch_atomic64_fetch_and + +static inline void arch_atomic64_or(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) + c = old; +} + +static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) + c = old; + + return old; +} +#define arch_atomic64_fetch_or arch_atomic64_fetch_or + +static inline void arch_atomic64_xor(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) + c = old; +} + +static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) + c = old; + + return old; +} +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + +static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +{ + s64 old, c = 0; + + while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) + c = old; + + return old; +} +#define arch_atomic64_fetch_add arch_atomic64_fetch_add + +#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v)) + +#endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h new file mode 100644 index 000000000..7886d0578 --- /dev/null +++ b/arch/x86/include/asm/atomic64_64.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ATOMIC64_64_H +#define _ASM_X86_ATOMIC64_64_H + +#include +#include +#include + +/* The 64-bit atomic type */ + +#define ATOMIC64_INIT(i) { (i) } + +/** + * arch_atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline s64 arch_atomic64_read(const atomic64_t *v) +{ + return __READ_ONCE((v)->counter); +} + +/** + * arch_atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void arch_atomic64_set(atomic64_t *v, s64 i) +{ + __WRITE_ONCE(v->counter, i); +} + +/** + * arch_atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "addq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter) : "memory"); +} + +/** + * arch_atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline void arch_atomic64_sub(s64 i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "subq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter) : "memory"); +} + +/** + * arch_atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); +} +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test + +/** + * arch_atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static __always_inline void arch_atomic64_inc(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "incq %0" + : "=m" (v->counter) + : "m" (v->counter) : "memory"); +} +#define arch_atomic64_inc arch_atomic64_inc + +/** + * arch_atomic64_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static __always_inline void arch_atomic64_dec(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0" + : "=m" (v->counter) + : "m" (v->counter) : "memory"); +} +#define arch_atomic64_dec arch_atomic64_dec + +/** + * arch_atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool arch_atomic64_dec_and_test(atomic64_t *v) +{ + return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); +} +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test + +/** + * arch_atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool arch_atomic64_inc_and_test(atomic64_t *v) +{ + return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); +} +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test + +/** + * arch_atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); +} +#define arch_atomic64_add_negative arch_atomic64_add_negative + +/** + * arch_atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +{ + return i + xadd(&v->counter, i); +} +#define arch_atomic64_add_return arch_atomic64_add_return + +static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) +{ + return arch_atomic64_add_return(-i, v); +} +#define arch_atomic64_sub_return arch_atomic64_sub_return + +static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +{ + return xadd(&v->counter, i); +} +#define arch_atomic64_fetch_add arch_atomic64_fetch_add + +static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) +{ + return xadd(&v->counter, -i); +} +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + +static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +{ + return arch_cmpxchg(&v->counter, old, new); +} +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg + +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + return arch_try_cmpxchg(&v->counter, old, new); +} +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg + +static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) +{ + return arch_xchg(&v->counter, new); +} +#define arch_atomic64_xchg arch_atomic64_xchg + +static inline void arch_atomic64_and(s64 i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "andq %1,%0" + : "+m" (v->counter) + : "er" (i) + : "memory"); +} + +static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) +{ + s64 val = arch_atomic64_read(v); + + do { + } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); + return val; +} +#define arch_atomic64_fetch_and arch_atomic64_fetch_and + +static inline void arch_atomic64_or(s64 i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "orq %1,%0" + : "+m" (v->counter) + : "er" (i) + : "memory"); +} + +static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) +{ + s64 val = arch_atomic64_read(v); + + do { + } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); + return val; +} +#define arch_atomic64_fetch_or arch_atomic64_fetch_or + +static inline void arch_atomic64_xor(s64 i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "xorq %1,%0" + : "+m" (v->counter) + : "er" (i) + : "memory"); +} + +static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) +{ + s64 val = arch_atomic64_read(v); + + do { + } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); + return val; +} +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + +#endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h new file mode 100644 index 000000000..36aec57ea --- /dev/null +++ b/arch/x86/include/asm/audit.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AUDIT_H +#define _ASM_X86_AUDIT_H + +int ia32_classify_syscall(unsigned int syscall); + +#endif /* _ASM_X86_AUDIT_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h new file mode 100644 index 000000000..35389b2af --- /dev/null +++ b/arch/x86/include/asm/barrier.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BARRIER_H +#define _ASM_X86_BARRIER_H + +#include +#include + +/* + * Force strict CPU ordering. + * And yes, this might be required on UP too when we're talking + * to devices. + */ + +#ifdef CONFIG_X86_32 +#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#else +#define __mb() asm volatile("mfence":::"memory") +#define __rmb() asm volatile("lfence":::"memory") +#define __wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm volatile ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"g"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC) + +#define __dma_rmb() barrier() +#define __dma_wmb() barrier() + +#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") + +#define __smp_rmb() dma_rmb() +#define __smp_wmb() barrier() +#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) + +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + +/* Atomic operations are already serializing on x86 */ +#define __smp_mb__before_atomic() do { } while (0) +#define __smp_mb__after_atomic() do { } while (0) + +#include + +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + asm volatile("mfence; lfence" : : : "memory"); +} + +#endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h new file mode 100644 index 000000000..4d5a17e2f --- /dev/null +++ b/arch/x86/include/asm/bios_ebda.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BIOS_EBDA_H +#define _ASM_X86_BIOS_EBDA_H + +#include + +/* + * Returns physical address of EBDA. Returns 0 if there is no EBDA. + */ +static inline unsigned int get_bios_ebda(void) +{ + /* + * There is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ + unsigned int address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + return address; /* 0 means none */ +} + +void reserve_bios_regions(void); + +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +/* + * This is obviously not a great place for this, but we want to be + * able to scatter it around anywhere in the kernel. + */ +void check_for_bios_corruption(void); +void start_periodic_check_for_corruption(void); +#else +static inline void check_for_bios_corruption(void) +{ +} + +static inline void start_periodic_check_for_corruption(void) +{ +} +#endif + +#endif /* _ASM_X86_BIOS_EBDA_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h new file mode 100644 index 000000000..2edf68475 --- /dev/null +++ b/arch/x86/include/asm/bitops.h @@ -0,0 +1,433 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BITOPS_H +#define _ASM_X86_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. + */ + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include +#include +#include +#include + +#if BITS_PER_LONG == 32 +# define _BITOPS_LONG_SHIFT 5 +#elif BITS_PER_LONG == 64 +# define _BITOPS_LONG_SHIFT 6 +#else +# error "Unexpected BITS_PER_LONG" +#endif + +#define BIT_64(n) (U64_C(1) << (n)) + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#define RLONG_ADDR(x) "m" (*(volatile long *) (x)) +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) + +#define ADDR RLONG_ADDR(addr) + +/* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ +#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK(nr) (1 << ((nr) & 7)) + +static __always_inline void +arch_set_bit(long nr, volatile unsigned long *addr) +{ + if (__builtin_constant_p(nr)) { + asm volatile(LOCK_PREFIX "orb %b1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" (CONST_MASK(nr)) + : "memory"); + } else { + asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); + } +} + +static __always_inline void +arch___set_bit(unsigned long nr, volatile unsigned long *addr) +{ + asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); +} + +static __always_inline void +arch_clear_bit(long nr, volatile unsigned long *addr) +{ + if (__builtin_constant_p(nr)) { + asm volatile(LOCK_PREFIX "andb %b1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" (~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); + } +} + +static __always_inline void +arch_clear_bit_unlock(long nr, volatile unsigned long *addr) +{ + barrier(); + arch_clear_bit(nr, addr); +} + +static __always_inline void +arch___clear_bit(unsigned long nr, volatile unsigned long *addr) +{ + asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); +} + +static __always_inline bool +arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +{ + bool negative; + asm volatile(LOCK_PREFIX "andb %2,%1" + CC_SET(s) + : CC_OUT(s) (negative), WBYTE_ADDR(addr) + : "ir" ((char) ~(1 << nr)) : "memory"); + return negative; +} +#define arch_clear_bit_unlock_is_negative_byte \ + arch_clear_bit_unlock_is_negative_byte + +static __always_inline void +arch___clear_bit_unlock(long nr, volatile unsigned long *addr) +{ + arch___clear_bit(nr, addr); +} + +static __always_inline void +arch___change_bit(unsigned long nr, volatile unsigned long *addr) +{ + asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); +} + +static __always_inline void +arch_change_bit(long nr, volatile unsigned long *addr) +{ + if (__builtin_constant_p(nr)) { + asm volatile(LOCK_PREFIX "xorb %b1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" (CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); + } +} + +static __always_inline bool +arch_test_and_set_bit(long nr, volatile unsigned long *addr) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); +} + +static __always_inline bool +arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr) +{ + return arch_test_and_set_bit(nr, addr); +} + +static __always_inline bool +arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) +{ + bool oldbit; + + asm(__ASM_SIZE(bts) " %2,%1" + CC_SET(c) + : CC_OUT(c) (oldbit) + : ADDR, "Ir" (nr) : "memory"); + return oldbit; +} + +static __always_inline bool +arch_test_and_clear_bit(long nr, volatile unsigned long *addr) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); +} + +/* + * Note: the operation is performed atomically with respect to + * the local CPU, but not other CPUs. Portable code should not + * rely on this behaviour. + * KVM relies on this behaviour on x86 for modifying memory that is also + * accessed from a hypervisor on the same CPU if running in a VM: don't change + * this without also updating arch/x86/kernel/kvm.c + */ +static __always_inline bool +arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile(__ASM_SIZE(btr) " %2,%1" + CC_SET(c) + : CC_OUT(c) (oldbit) + : ADDR, "Ir" (nr) : "memory"); + return oldbit; +} + +static __always_inline bool +arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile(__ASM_SIZE(btc) " %2,%1" + CC_SET(c) + : CC_OUT(c) (oldbit) + : ADDR, "Ir" (nr) : "memory"); + + return oldbit; +} + +static __always_inline bool +arch_test_and_change_bit(long nr, volatile unsigned long *addr) +{ + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); +} + +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr & (BITS_PER_LONG-1))) & + (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; +} + +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + +static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) +{ + bool oldbit; + + asm volatile(__ASM_SIZE(bt) " %2,%1" + CC_SET(c) + : CC_OUT(c) (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); + + return oldbit; +} + +static __always_inline bool +arch_test_bit(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) : + variable_test_bit(nr, addr); +} + +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + +static __always_inline unsigned long variable__ffs(unsigned long word) +{ + asm("rep; bsf %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +/** + * __ffs - find first set bit in word + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +#define __ffs(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(word) : \ + variable__ffs(word)) + +static __always_inline unsigned long variable_ffz(unsigned long word) +{ + asm("rep; bsf %1,%0" + : "=r" (word) + : "r" (~word)); + return word; +} + +/** + * ffz - find first zero bit in word + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +#define ffz(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(~word) : \ + variable_ffz(word)) + +/* + * __fls: find last set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +#undef ADDR + +#ifdef __KERNEL__ +static __always_inline int variable_ffs(int x) +{ + int r; + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + asm("bsfl %1,%0" + : "=r" (r) + : "rm" (x), "0" (-1)); +#elif defined(CONFIG_X86_CMOV) + asm("bsfl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "r" (-1)); +#else + asm("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x)) + +/** + * fls - find last set bit in word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffs, but returns the position of the most significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 32. + */ +static __always_inline int fls(unsigned int x) +{ + int r; + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + asm("bsrl %1,%0" + : "=r" (r) + : "rm" (x), "0" (-1)); +#elif defined(CONFIG_X86_CMOV) + asm("bsrl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "rm" (-1)); +#else + asm("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#ifdef CONFIG_X86_64 +static __always_inline int fls64(__u64 x) +{ + int bitpos = -1; + /* + * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before. + */ + asm("bsrq %1,%q0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; +} +#else +#include +#endif + +#include + +#include + +#include + +#include +#include +#include + +#include + +#include + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_BITOPS_H */ diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h new file mode 100644 index 000000000..215d37f7d --- /dev/null +++ b/arch/x86/include/asm/boot.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BOOT_H +#define _ASM_X86_BOOT_H + + +#include +#include + +/* Physical address where kernel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + +/* Minimum kernel alignment, as a power of two */ +#ifdef CONFIG_X86_64 +# define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +#else +# define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER) +#endif +#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) + +#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ + (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) +# error "Invalid value for CONFIG_PHYSICAL_ALIGN" +#endif + +#if defined(CONFIG_KERNEL_BZIP2) +# define BOOT_HEAP_SIZE 0x400000 +#elif defined(CONFIG_KERNEL_ZSTD) +/* + * Zstd needs to allocate the ZSTD_DCtx in order to decompress the kernel. + * The ZSTD_DCtx is ~160KB, so set the heap size to 192KB because it is a + * round number and to allow some slack. + */ +# define BOOT_HEAP_SIZE 0x30000 +#else +# define BOOT_HEAP_SIZE 0x10000 +#endif + +#ifdef CONFIG_X86_64 +# define BOOT_STACK_SIZE 0x4000 + +/* + * Used by decompressor's startup_32() to allocate page tables for identity + * mapping of the 4G of RAM in 4-level paging mode: + * - 1 level4 table; + * - 1 level3 table; + * - 4 level2 table that maps everything with 2M pages; + * + * The additional level5 table needed for 5-level paging is allocated from + * trampoline_32bit memory. + */ +# define BOOT_INIT_PGT_SIZE (6*4096) + +/* + * Total number of page tables kernel_add_identity_map() can allocate, + * including page tables consumed by startup_32(). + * + * Worst-case scenario: + * - 5-level paging needs 1 level5 table; + * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel, + * assuming all of them cross 256T boundary: + * + 4*2 level4 table; + * + 4*2 level3 table; + * + 4*2 level2 table; + * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM): + * + 1 level4 table; + * + 1 level3 table; + * + 1 level2 table; + * Total: 28 tables + * + * Add 4 spare table in case decompressor touches anything beyond what is + * accounted above. Warn if it happens. + */ +# define BOOT_PGT_SIZE_WARN (28*4096) +# define BOOT_PGT_SIZE (32*4096) + +#else /* !CONFIG_X86_64 */ +# define BOOT_STACK_SIZE 0x1000 +#endif + +#endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h new file mode 100644 index 000000000..53e9b0620 --- /dev/null +++ b/arch/x86/include/asm/bootparam_utils.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BOOTPARAM_UTILS_H +#define _ASM_X86_BOOTPARAM_UTILS_H + +#include + +/* + * This file is included from multiple environments. Do not + * add completing #includes to make it standalone. + */ + +/* + * Deal with bootloaders which fail to initialize unknown fields in + * boot_params to zero. The list fields in this list are taken from + * analysis of kexec-tools; if other broken bootloaders initialize a + * different set of fields we will need to figure out how to disambiguate. + * + * Note: efi_info is commonly left uninitialized, but that field has a + * private magic, so it is better to leave it unchanged. + */ + +#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); }) + +#define BOOT_PARAM_PRESERVE(struct_member) \ + { \ + .start = offsetof(struct boot_params, struct_member), \ + .len = sizeof_mbr(struct boot_params, struct_member), \ + } + +struct boot_params_to_save { + unsigned int start; + unsigned int len; +}; + +static void sanitize_boot_params(struct boot_params *boot_params) +{ + /* + * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear + * this field. The purpose of this field is to guarantee + * compliance with the x86 boot spec located in + * Documentation/x86/boot.rst . That spec says that the + * *whole* structure should be cleared, after which only the + * portion defined by struct setup_header (boot_params->hdr) + * should be copied in. + * + * If you're having an issue because the sentinel is set, you + * need to change the whole structure to be cleared, not this + * (or any other) individual field, or you will soon have + * problems again. + */ + if (boot_params->sentinel) { + static struct boot_params scratch; + char *bp_base = (char *)boot_params; + char *save_base = (char *)&scratch; + int i; + + const struct boot_params_to_save to_save[] = { + BOOT_PARAM_PRESERVE(screen_info), + BOOT_PARAM_PRESERVE(apm_bios_info), + BOOT_PARAM_PRESERVE(tboot_addr), + BOOT_PARAM_PRESERVE(ist_info), + BOOT_PARAM_PRESERVE(hd0_info), + BOOT_PARAM_PRESERVE(hd1_info), + BOOT_PARAM_PRESERVE(sys_desc_table), + BOOT_PARAM_PRESERVE(olpc_ofw_header), + BOOT_PARAM_PRESERVE(efi_info), + BOOT_PARAM_PRESERVE(alt_mem_k), + BOOT_PARAM_PRESERVE(scratch), + BOOT_PARAM_PRESERVE(e820_entries), + BOOT_PARAM_PRESERVE(eddbuf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(secure_boot), + BOOT_PARAM_PRESERVE(hdr), + BOOT_PARAM_PRESERVE(e820_table), + BOOT_PARAM_PRESERVE(eddbuf), + BOOT_PARAM_PRESERVE(cc_blob_address), + }; + + memset(&scratch, 0, sizeof(scratch)); + + for (i = 0; i < ARRAY_SIZE(to_save); i++) { + memcpy(save_base + to_save[i].start, + bp_base + to_save[i].start, to_save[i].len); + } + + memcpy(boot_params, save_base, sizeof(*boot_params)); + } +} + +#endif /* _ASM_X86_BOOTPARAM_UTILS_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h new file mode 100644 index 000000000..a3ec87d19 --- /dev/null +++ b/arch/x86/include/asm/bug.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BUG_H +#define _ASM_X86_BUG_H + +#include +#include +#include + +/* + * Despite that some emulators terminate on UD2, we use it for WARN(). + */ +#define ASM_UD2 ".byte 0x0f, 0x0b" +#define INSN_UD2 0x0b0f +#define LEN_UD2 2 + +#ifdef CONFIG_GENERIC_BUG + +#ifdef CONFIG_X86_32 +# define __BUG_REL(val) ".long " __stringify(val) +#else +# define __BUG_REL(val) ".long " __stringify(val) " - ." +#endif + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#define _BUG_FLAGS(ins, flags, extra) \ +do { \ + asm_inline volatile("1:\t" ins "\n" \ + ".pushsection __bug_table,\"aw\"\n" \ + "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ + "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ + "\t.word %c1" "\t# bug_entry::line\n" \ + "\t.word %c2" "\t# bug_entry::flags\n" \ + "\t.org 2b+%c3\n" \ + ".popsection\n" \ + extra \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (flags), \ + "i" (sizeof(struct bug_entry))); \ +} while (0) + +#else /* !CONFIG_DEBUG_BUGVERBOSE */ + +#define _BUG_FLAGS(ins, flags, extra) \ +do { \ + asm_inline volatile("1:\t" ins "\n" \ + ".pushsection __bug_table,\"aw\"\n" \ + "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ + "\t.word %c0" "\t# bug_entry::flags\n" \ + "\t.org 2b+%c1\n" \ + ".popsection\n" \ + extra \ + : : "i" (flags), \ + "i" (sizeof(struct bug_entry))); \ +} while (0) + +#endif /* CONFIG_DEBUG_BUGVERBOSE */ + +#else + +#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins) + +#endif /* CONFIG_GENERIC_BUG */ + +#define HAVE_ARCH_BUG +#define BUG() \ +do { \ + instrumentation_begin(); \ + _BUG_FLAGS(ASM_UD2, 0, ""); \ + __builtin_unreachable(); \ +} while (0) + +/* + * This instrumentation_begin() is strictly speaking incorrect; but it + * suppresses the complaints from WARN()s in noinstr code. If such a WARN() + * were to trigger, we'd rather wreck the machine in an attempt to get the + * message out than not know about it. + */ +#define __WARN_FLAGS(flags) \ +do { \ + __auto_type __flags = BUGFLAG_WARNING|(flags); \ + instrumentation_begin(); \ + _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \ + instrumentation_end(); \ +} while (0) + +#include + +#endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h new file mode 100644 index 000000000..f25ca2d70 --- /dev/null +++ b/arch/x86/include/asm/bugs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_BUGS_H +#define _ASM_X86_BUGS_H + +#include + +#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) +int ppro_with_ram_bug(void); +#else +static inline int ppro_with_ram_bug(void) { return 0; } +#endif + +extern void cpu_bugs_smt_update(void); + +#endif /* _ASM_X86_BUGS_H */ diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h new file mode 100644 index 000000000..69404eae9 --- /dev/null +++ b/arch/x86/include/asm/cache.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CACHE_H +#define _ASM_X86_CACHE_H + +#include + +/* L1 cache line size */ +#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define __read_mostly __section(".data..read_mostly") + +#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT +#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) + +#ifdef CONFIG_X86_VSMP +#ifdef CONFIG_SMP +#define __cacheline_aligned_in_smp \ + __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \ + __page_aligned_data +#endif +#endif + +#endif /* _ASM_X86_CACHE_H */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h new file mode 100644 index 000000000..b192d917a --- /dev/null +++ b/arch/x86/include/asm/cacheflush.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CACHEFLUSH_H +#define _ASM_X86_CACHEFLUSH_H + +#include + +/* Caches aren't brain-dead on the intel. */ +#include +#include + +void clflush_cache_range(void *addr, unsigned int size); + +#endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h new file mode 100644 index 000000000..86b2e0dcc --- /dev/null +++ b/arch/x86/include/asm/cacheinfo.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CACHEINFO_H +#define _ASM_X86_CACHEINFO_H + +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); + +#endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h new file mode 100644 index 000000000..2930f560d --- /dev/null +++ b/arch/x86/include/asm/ce4100.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_CE4100_H_ +#define _ASM_CE4100_H_ + +int ce4100_pci_init(void); + +#endif diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h new file mode 100644 index 000000000..58dacd90d --- /dev/null +++ b/arch/x86/include/asm/cfi.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CFI_H +#define _ASM_X86_CFI_H + +/* + * Clang Control Flow Integrity (CFI) support. + * + * Copyright (C) 2022 Google LLC + */ + +#include + +#ifdef CONFIG_CFI_CLANG +enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); +#else +static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) +{ + return BUG_TRAP_TYPE_NONE; +} +#endif /* CONFIG_CFI_CLANG */ + +#endif /* _ASM_X86_CFI_H */ diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h new file mode 100644 index 000000000..6df6ece8a --- /dev/null +++ b/arch/x86/include/asm/checksum.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_GENERIC_CSUM +# include +#else +# define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +# define HAVE_CSUM_COPY_USER +# define _HAVE_ARCH_CSUM_AND_COPY +# ifdef CONFIG_X86_32 +# include +# else +# include +# endif +#endif diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h new file mode 100644 index 000000000..17da95387 --- /dev/null +++ b/arch/x86/include/asm/checksum_32.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CHECKSUM_32_H +#define _ASM_X86_CHECKSUM_32_H + +#include +#include + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) +{ + return csum_partial_copy_generic(src, dst, len); +} + +static inline __wsum csum_and_copy_from_user(const void __user *src, + void *dst, int len) +{ + __wsum ret; + + might_sleep(); + if (!user_access_begin(src, len)) + return 0; + ret = csum_partial_copy_generic((__force void *)src, dst, len); + user_access_end(); + + return ret; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik , adapted for linux by + * Arnt Gulbrandsen. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm volatile("movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0;\n" + "1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" + "2: ;\n" + /* Since the input registers which are loaded with iph and ihl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/* + * Fold a partial checksum + */ + +static inline __sum16 csum_fold(__wsum sum) +{ + asm("addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000)); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum sum) +{ + asm("addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" + : "=r" (sum) + : "g" (daddr), "g"(saddr), + "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum) +{ + asm("addl 0(%1), %0 ;\n" + "adcl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" + "adcl 0(%2), %0 ;\n" + "adcl 4(%2), %0 ;\n" + "adcl 8(%2), %0 ;\n" + "adcl 12(%2), %0 ;\n" + "adcl %3, %0 ;\n" + "adcl %4, %0 ;\n" + "adcl $0, %0 ;\n" + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r" (htonl(len)), "r" (htonl(proto)), "0" (sum) + : "memory"); + + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +static inline __wsum csum_and_copy_to_user(const void *src, + void __user *dst, + int len) +{ + __wsum ret; + + might_sleep(); + if (!user_access_begin(dst, len)) + return 0; + + ret = csum_partial_copy_generic(src, (__force void *)dst, len); + user_access_end(); + return ret; +} + +#endif /* _ASM_X86_CHECKSUM_32_H */ diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h new file mode 100644 index 000000000..407beebad --- /dev/null +++ b/arch/x86/include/asm/checksum_64.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CHECKSUM_64_H +#define _ASM_X86_CHECKSUM_64_H + +/* + * Checksums for x86-64 + * Copyright 2002 by Andi Kleen, SuSE Labs + * with some code from asm-x86/checksum.h + */ + +#include +#include +#include + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + asm(" addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000)); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik , adapted for linux by + * Arnt Gulbrandsen. + */ + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm(" movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ihl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), + "g" ((len + proto)<<8), "0" (sum)); + return sum; +} + + +/** + * csum_tcpup_magic - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the 16bit pseudo header checksum the input data already + * complemented and ready to be filled in. + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/** + * csum_partial - Compute an internet checksum. + * @buff: buffer to be checksummed + * @len: length of buffer. + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the 32bit unfolded internet checksum of the buffer. + * Before filling it in it needs to be csum_fold()'ed. + * buff should be aligned to a 64bit boundary if possible. + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* Do not call this directly. Use the wrappers below */ +extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len); + +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len); +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len); +extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len); + +/** + * ip_compute_csum - Compute an 16bit IP checksum. + * @buff: buffer address. + * @len: length of buffer. + * + * Returns the 16bit folded/inverted checksum of the passed buffer. + * Ready to fill in. + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +/** + * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: protocol of packet + * @sum: initial sum (32bit unfolded) to be added in + * + * Computes an IPv6 pseudo header checksum. This sum is added the checksum + * into UDP/TCP packets and contains some link layer information. + * Returns the unfolded 32bit checksum. + */ + +struct in6_addr; + +#define _HAVE_ARCH_IPV6_CSUM 1 +extern __sum16 +csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); + +static inline unsigned add32_with_carry(unsigned a, unsigned b) +{ + asm("addl %2,%0\n\t" + "adcl $0,%0" + : "=r" (a) + : "0" (a), "rm" (b)); + return a; +} + +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ + return (__force __wsum)add32_with_carry((__force unsigned)csum, + (__force unsigned)addend); +} + +#endif /* _ASM_X86_CHECKSUM_64_H */ diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h new file mode 100644 index 000000000..dc9dc7b39 --- /dev/null +++ b/arch/x86/include/asm/clocksource.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* x86-specific clocksource additions */ + +#ifndef _ASM_X86_CLOCKSOURCE_H +#define _ASM_X86_CLOCKSOURCE_H + +#include + +extern unsigned int vclocks_used; + +static inline bool vclock_was_used(int vclock) +{ + return READ_ONCE(vclocks_used) & (1U << vclock); +} + +static inline void vclocks_set_used(unsigned int which) +{ + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << which)); +} + +#endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h new file mode 100644 index 000000000..6faaf27e8 --- /dev/null +++ b/arch/x86/include/asm/cmdline.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CMDLINE_H +#define _ASM_X86_CMDLINE_H + +int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); +int cmdline_find_option(const char *cmdline_ptr, const char *option, + char *buffer, int bufsize); + +#endif /* _ASM_X86_CMDLINE_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h new file mode 100644 index 000000000..94fbe6ae7 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_CMPXCHG_H +#define ASM_X86_CMPXCHG_H + +#include +#include +#include /* Provides LOCK_PREFIX */ + +/* + * Non-existent functions to indicate usage errors at link time + * (or compile-time if the compiler implements __compiletime_error(). + */ +extern void __xchg_wrong_size(void) + __compiletime_error("Bad argument size for xchg"); +extern void __cmpxchg_wrong_size(void) + __compiletime_error("Bad argument size for cmpxchg"); +extern void __xadd_wrong_size(void) + __compiletime_error("Bad argument size for xadd"); +extern void __add_wrong_size(void) + __compiletime_error("Bad argument size for add"); + +/* + * Constants for operation sizes. On 32-bit, the 64-bit size it set to + * -1 because sizeof will never return -1, thereby making those switch + * case statements guaranteed dead code which the compiler will + * eliminate, and allowing the "missing symbol in the default case" to + * indicate a usage error. + */ +#define __X86_CASE_B 1 +#define __X86_CASE_W 2 +#define __X86_CASE_L 4 +#ifdef CONFIG_64BIT +#define __X86_CASE_Q 8 +#else +#define __X86_CASE_Q -1 /* sizeof will never return -1 */ +#endif + +/* + * An exchange-type operation, which takes a value and a pointer, and + * returns the old value. + */ +#define __xchg_op(ptr, arg, op, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret = (arg); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_B: \ + asm volatile (lock #op "b %b0, %1\n" \ + : "+q" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_W: \ + asm volatile (lock #op "w %w0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_L: \ + asm volatile (lock #op "l %0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm volatile (lock #op "q %q0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + default: \ + __ ## op ## _wrong_size(); \ + } \ + __ret; \ + }) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. + */ +#define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "q" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +#define arch_cmpxchg(ptr, old, new) \ + __cmpxchg(ptr, old, new, sizeof(*(ptr))) + +#define arch_sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg(ptr, old, new, sizeof(*(ptr))) + +#define arch_cmpxchg_local(ptr, old, new) \ + __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) + + +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ +({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(_ptr); \ + asm volatile(lock "cmpxchgb %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "q" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(_ptr); \ + asm volatile(lock "cmpxchgw %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(_ptr); \ + asm volatile(lock "cmpxchgl %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(_ptr); \ + asm volatile(lock "cmpxchgq %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); \ +}) + +#define __try_cmpxchg(ptr, pold, new, size) \ + __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) + +#define arch_try_cmpxchg(ptr, pold, new) \ + __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) + +/* + * xadd() adds "inc" to "*ptr" and atomically returns the previous + * value of "*ptr". + * + * xadd() is locked when multiple CPUs are online + */ +#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) +#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) + +#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ +({ \ + bool __ret; \ + __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ + __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ + VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ + asm volatile(pfx "cmpxchg%c5b %1" \ + CC_SET(e) \ + : CC_OUT(e) (__ret), \ + "+m" (*(p1)), "+m" (*(p2)), \ + "+a" (__old1), "+d" (__old2) \ + : "i" (2 * sizeof(long)), \ + "b" (__new1), "c" (__new2)); \ + __ret; \ +}) + +#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) + +#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(, p1, p2, o1, o2, n1, n2) + +#endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h new file mode 100644 index 000000000..215f5a657 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CMPXCHG_32_H +#define _ASM_X86_CMPXCHG_32_H + +/* + * Note: if you use set64_bit(), __cmpxchg64(), or their variants, + * you need to test for the feature in boot_cpu_data. + */ + +/* + * CMPXCHG8B only writes to the target if we had the previous + * value in registers, otherwise it acts as a read and gives us the + * "new previous" value. That is why there is a loop. Preloading + * EDX:EAX is a performance optimization: in the common case it means + * we need only one locked operation. + * + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very + * least an FPU save and/or %cr0.ts manipulation. + * + * cmpxchg8b must be used with the lock prefix here to allow the + * instruction to be executed atomically. We need to have the reader + * side to see the coherent 64bit value. + */ +static inline void set_64bit(volatile u64 *ptr, u64 value) +{ + u32 low = value; + u32 high = value >> 32; + u64 prev = *ptr; + + asm volatile("\n1:\t" + LOCK_PREFIX "cmpxchg8b %0\n\t" + "jnz 1b" + : "=m" (*ptr), "+A" (prev) + : "b" (low), "c" (high) + : "memory"); +} + +#ifdef CONFIG_X86_CMPXCHG64 +#define arch_cmpxchg64(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#define arch_cmpxchg64_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#define arch_try_cmpxchg64(ptr, po, n) \ + __try_cmpxchg64((ptr), (unsigned long long *)(po), \ + (unsigned long long)(n)) +#endif + +static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +{ + u64 prev; + asm volatile(LOCK_PREFIX "cmpxchg8b %1" + : "=A" (prev), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), + "0" (old) + : "memory"); + return prev; +} + +static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +{ + u64 prev; + asm volatile("cmpxchg8b %1" + : "=A" (prev), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), + "0" (old) + : "memory"); + return prev; +} + +static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) +{ + bool success; + u64 old = *pold; + asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]" + CC_SET(z) + : CC_OUT(z) (success), + [ptr] "+m" (*ptr), + "+A" (old) + : "b" ((u32)new), + "c" ((u32)(new >> 32)) + : "memory"); + + if (unlikely(!success)) + *pold = old; + return success; +} + +#ifndef CONFIG_X86_CMPXCHG64 +/* + * Building a kernel capable running on 80386 and 80486. It may be necessary + * to simulate the cmpxchg8b on the 80386 and 80486 CPU. + */ + +#define arch_cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io(LOCK_PREFIX_HERE \ + "call cmpxchg8b_emu", \ + "lock; cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) + + +#define arch_cmpxchg64_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io("call cmpxchg8b_emu", \ + "cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) + +#endif + +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) + +#endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h new file mode 100644 index 000000000..250187ac8 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CMPXCHG_64_H +#define _ASM_X86_CMPXCHG_64_H + +static inline void set_64bit(volatile u64 *ptr, u64 val) +{ + *ptr = val; +} + +#define arch_cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg((ptr), (o), (n)); \ +}) + +#define arch_cmpxchg64_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_local((ptr), (o), (n)); \ +}) + +#define arch_try_cmpxchg64(ptr, po, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_try_cmpxchg((ptr), (po), (n)); \ +}) + +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) + +#endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h new file mode 100644 index 000000000..3d98c3a60 --- /dev/null +++ b/arch/x86/include/asm/coco.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_COCO_H +#define _ASM_X86_COCO_H + +#include + +enum cc_vendor { + CC_VENDOR_NONE, + CC_VENDOR_AMD, + CC_VENDOR_HYPERV, + CC_VENDOR_INTEL, +}; + +void cc_set_vendor(enum cc_vendor v); +void cc_set_mask(u64 mask); + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM +u64 cc_mkenc(u64 val); +u64 cc_mkdec(u64 val); +#else +static inline u64 cc_mkenc(u64 val) +{ + return val; +} + +static inline u64 cc_mkdec(u64 val) +{ + return val; +} +#endif + +#endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h new file mode 100644 index 000000000..b1221da47 --- /dev/null +++ b/arch/x86/include/asm/compat.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_COMPAT_H +#define _ASM_X86_COMPAT_H + +/* + * Architecture specific compatibility types + */ +#include +#include +#include +#include +#include +#include + +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + +#define __compat_uid_t __compat_uid_t +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; + +#define compat_dev_t compat_dev_t +typedef u16 compat_dev_t; + +#define compat_ipc_pid_t compat_ipc_pid_t +typedef u16 compat_ipc_pid_t; + +#define compat_statfs compat_statfs + +#include + +#define COMPAT_UTS_MACHINE "i686\0\0" + +typedef u16 compat_nlink_t; + +struct compat_stat { + u32 st_dev; + compat_ino_t st_ino; + compat_mode_t st_mode; + compat_nlink_t st_nlink; + __compat_uid_t st_uid; + __compat_gid_t st_gid; + u32 st_rdev; + u32 st_size; + u32 st_blksize; + u32 st_blocks; + u32 st_atime; + u32 st_atime_nsec; + u32 st_mtime; + u32 st_mtime_nsec; + u32 st_ctime; + u32 st_ctime_nsec; + u32 __unused4; + u32 __unused5; +}; + +/* + * IA32 uses 4 byte alignment for 64 bit quantities, so we need to pack the + * compat flock64 structure. + */ +#define __ARCH_NEED_COMPAT_FLOCK64_PACKED + +struct compat_statfs { + int f_type; + int f_bsize; + int f_blocks; + int f_bfree; + int f_bavail; + int f_files; + int f_ffree; + compat_fsid_t f_fsid; + int f_namelen; /* SunOS ignores this field. */ + int f_frsize; + int f_flags; + int f_spare[4]; +}; + +#ifdef CONFIG_X86_X32_ABI +#define COMPAT_USE_64BIT_TIME \ + (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) +#endif + +static inline bool in_x32_syscall(void) +{ +#ifdef CONFIG_X86_X32_ABI + if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) + return true; +#endif + return false; +} + +static inline bool in_32bit_syscall(void) +{ + return in_ia32_syscall() || in_x32_syscall(); +} + +#ifdef CONFIG_COMPAT +static inline bool in_compat_syscall(void) +{ + return in_32bit_syscall(); +} +#define in_compat_syscall in_compat_syscall /* override the generic impl */ +#define compat_need_64bit_alignment_fixup in_ia32_syscall +#endif + +struct compat_siginfo; + +#ifdef CONFIG_X86_X32_ABI +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const kernel_siginfo_t *from); +#define copy_siginfo_to_user32 copy_siginfo_to_user32 +#endif /* CONFIG_X86_X32_ABI */ + +#endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h new file mode 100644 index 000000000..37639a2d9 --- /dev/null +++ b/arch/x86/include/asm/cpu.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPU_H +#define _ASM_X86_CPU_H + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define cpu_acpi_id(cpu) 0 +#define safe_smp_processor_id() 0 + +#endif /* CONFIG_SMP */ + +struct x86_cpu { + struct cpu cpu; +}; + +#ifdef CONFIG_HOTPLUG_CPU +extern int arch_register_cpu(int num); +extern void arch_unregister_cpu(int); +extern void start_cpu0(void); +#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 +extern int _debug_hotplug_cpu(int cpu, int action); +#endif +#endif + +extern void ap_init_aperfmperf(void); + +int mwait_usable(const struct cpuinfo_x86 *); + +unsigned int x86_family(unsigned int sig); +unsigned int x86_model(unsigned int sig); +unsigned int x86_stepping(unsigned int sig); +#ifdef CONFIG_CPU_SUP_INTEL +extern void __init sld_setup(struct cpuinfo_x86 *c); +extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); +extern bool handle_guest_split_lock(unsigned long ip); +extern void handle_bus_lock(struct pt_regs *regs); +u8 get_this_hybrid_cpu_type(void); +#else +static inline void __init sld_setup(struct cpuinfo_x86 *c) {} +static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) +{ + return false; +} + +static inline bool handle_guest_split_lock(unsigned long ip) +{ + return false; +} + +static inline void handle_bus_lock(struct pt_regs *regs) {} + +static inline u8 get_this_hybrid_cpu_type(void) +{ + return 0; +} +#endif +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#else +static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} +#endif + +extern __noendbr void cet_disable(void); + +struct ucode_cpu_info; + +int intel_cpu_collect_info(struct ucode_cpu_info *uci); + +static inline bool intel_cpu_signatures_match(unsigned int s1, unsigned int p1, + unsigned int s2, unsigned int p2) +{ + if (s1 != s2) + return false; + + /* Processor flags are either both 0 ... */ + if (!p1 && !p2) + return true; + + /* ... or they intersect. */ + return p1 & p2; +} + +extern u64 x86_read_arch_cap_msr(void); + +extern struct cpumask cpus_stop_mask; + +#endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h new file mode 100644 index 000000000..eb8fcede9 --- /dev/null +++ b/arch/x86/include/asm/cpu_device_id.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPU_DEVICE_ID +#define _ASM_X86_CPU_DEVICE_ID + +/* + * Declare drivers belonging to specific x86 CPUs + * Similar in spirit to pci_device_id and related PCI functions + * + * The wildcard initializers are in mod_devicetable.h because + * file2alias needs them. Sigh. + */ +#include +/* Get the INTEL_FAM* model defines */ +#include +/* And the X86_VENDOR_* ones */ +#include + +/* Centaur FAM6 models */ +#define X86_CENTAUR_FAM6_C7_A 0xa +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) +/** + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Use only if you need all selectors. Otherwise use one of the shorter + * macros of the X86_MATCH_* family. If there is no matching shorthand + * macro, consider to add one. If you really need to wrap one of the macros + * into another macro at the usage site for good reasons, then please + * start this local macro with X86_MATCH to allow easy grepping. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + +/** + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ + X86_STEPPING_ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ + X86_MODEL_ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) + +/** + * X86_MATCH_FEATURE - Macro for matching a CPU feature + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_VENDOR_FEATURE(ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @model: The model number, model constant or X86_MODEL_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ + X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VENDOR_FAM - Match vendor and family + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set of wildcards. + */ +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) + +/** + * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model + * @model: The model name without the INTEL_FAM6_ prefix or ANY + * The model name is expanded to INTEL_FAM6_@model internally + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The vendor is set to INTEL, the family to 6 and all other missing + * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards. + * + * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information. + */ +#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) + +#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ + steppings, X86_FEATURE_ANY, data) + +/* + * Match specific microcode revisions. + * + * vendor/family/model/stepping must be all set. + * + * Only checks against the boot CPU. When mixed-stepping configs are + * valid for a CPU model, add a quirk for every valid stepping and + * do the fine-tuning in the quirk handler. + */ + +struct x86_cpu_desc { + u8 x86_family; + u8 x86_vendor; + u8 x86_model; + u8 x86_stepping; + u32 x86_microcode_rev; +}; + +#define INTEL_CPU_DESC(model, stepping, revision) { \ + .x86_family = 6, \ + .x86_vendor = X86_VENDOR_INTEL, \ + .x86_model = (model), \ + .x86_stepping = (stepping), \ + .x86_microcode_rev = (revision), \ +} + +extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); +extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table); + +#endif /* _ASM_X86_CPU_DEVICE_ID */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 000000000..75efc4c6f --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_X86_CPU_ENTRY_AREA_H +#define _ASM_X86_CPU_ENTRY_AREA_H + +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 + +#ifdef CONFIG_AMD_MEM_ENCRYPT +#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ +#else +#define VC_EXCEPTION_STKSZ 0 +#endif + +/* Macro to enforce the same ordering and stack sizes */ +#define ESTACKS_MEMBERS(guardsize, optional_stack_size) \ + char DF_stack_guard[guardsize]; \ + char DF_stack[EXCEPTION_STKSZ]; \ + char NMI_stack_guard[guardsize]; \ + char NMI_stack[EXCEPTION_STKSZ]; \ + char DB_stack_guard[guardsize]; \ + char DB_stack[EXCEPTION_STKSZ]; \ + char MCE_stack_guard[guardsize]; \ + char MCE_stack[EXCEPTION_STKSZ]; \ + char VC_stack_guard[guardsize]; \ + char VC_stack[optional_stack_size]; \ + char VC2_stack_guard[guardsize]; \ + char VC2_stack[optional_stack_size]; \ + char IST_top_guard[guardsize]; \ + +/* The exception stacks' physical storage. No guard pages required */ +struct exception_stacks { + ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ) +}; + +/* The effective cpu entry area mapping with guard pages. */ +struct cea_exception_stacks { + ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) +}; + +/* + * The exception stack ordering in [cea_]exception_stacks + */ +enum exception_stack_ordering { + ESTACK_DF, + ESTACK_NMI, + ESTACK_DB, + ESTACK_MCE, + ESTACK_VC, + ESTACK_VC2, + N_EXCEPTION_STACKS +}; + +#define CEA_ESTACK_SIZE(st) \ + sizeof(((struct cea_exception_stacks *)0)->st## _stack) + +#define CEA_ESTACK_BOT(ceastp, st) \ + ((unsigned long)&(ceastp)->st## _stack) + +#define CEA_ESTACK_TOP(ceastp, st) \ + (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) + +#define CEA_ESTACK_OFFS(st) \ + offsetof(struct cea_exception_stacks, st## _stack) + +#define CEA_ESTACK_PAGES \ + (sizeof(struct cea_exception_stacks) / PAGE_SIZE) + +#endif + +#ifdef CONFIG_X86_32 +struct doublefault_stack { + unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)]; + struct x86_hw_tss tss; +} __aligned(PAGE_SIZE); +#endif + +/* + * cpu_entry_area is a percpu region that contains things needed by the CPU + * and early entry/exit code. Real types aren't used for all fields here + * to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* + * The GDT is just below entry_stack and thus serves (on x86_64) as + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. + */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif + struct entry_stack_page entry_stack_page; + +#ifdef CONFIG_X86_32 + char guard_doublefault_stack[PAGE_SIZE]; + struct doublefault_stack doublefault_stack; +#endif + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries with guard pages. + */ + struct cea_exception_stacks estacks; +#endif + /* + * Per CPU debug store for Intel performance monitoring. Wastes a + * full page at the moment. + */ + struct debug_store cpu_debug_store; + /* + * The actual PEBS/BTS buffers must be mapped to user space + * Reserve enough fixmap PTEs. + */ + struct debug_store_buffers cpu_debug_buffers; +}; + +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) + +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); +DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); + +extern void setup_cpu_entry_areas(void); +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); + +extern struct cpu_entry_area *get_cpu_entry_area(int cpu); + +static __always_inline struct entry_stack *cpu_entry_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; +} + +#define __this_cpu_ist_top_va(name) \ + CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) + +#define __this_cpu_ist_bottom_va(name) \ + CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name) + +#endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h new file mode 100644 index 000000000..ce0c8f7d3 --- /dev/null +++ b/arch/x86/include/asm/cpufeature.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUFEATURE_H +#define _ASM_X86_CPUFEATURE_H + +#include + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) + +#include +#include +#include + +enum cpuid_leafs +{ + CPUID_1_EDX = 0, + CPUID_8000_0001_EDX, + CPUID_8086_0001_EDX, + CPUID_LNX_1, + CPUID_1_ECX, + CPUID_C000_0001_EDX, + CPUID_8000_0001_ECX, + CPUID_LNX_2, + CPUID_LNX_3, + CPUID_7_0_EBX, + CPUID_D_1_EAX, + CPUID_LNX_4, + CPUID_7_1_EAX, + CPUID_8000_0008_EBX, + CPUID_6_EAX, + CPUID_8000_000A_EDX, + CPUID_7_ECX, + CPUID_8000_0007_EBX, + CPUID_7_EDX, + CPUID_8000_001F_EAX, + CPUID_8000_0021_EAX, +}; + +#define X86_CAP_FMT_NUM "%d:%d" +#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31) + +#ifdef CONFIG_X86_FEATURE_NAMES +extern const char * const x86_cap_flags[NCAPINTS*32]; +extern const char * const x86_power_flags[32]; +#define X86_CAP_FMT "%s" +#define x86_cap_flag(flag) x86_cap_flags[flag] +#else +#define X86_CAP_FMT X86_CAP_FMT_NUM +#define x86_cap_flag x86_cap_flag_num +#endif + +/* + * In order to save room, we index into this array by doing + * X86_BUG_ - NCAPINTS*32. + */ +extern const char * const x86_bug_flags[NBUGINTS*32]; + +#define test_cpu_cap(c, bit) \ + arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) + +/* + * There are 32 bits/features in each mask word. The high bits + * (selected with (bit>>5) give us the word number and the low 5 + * bits give us the bit/feature number inside the word. + * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can + * see if it is set in the mask word. + */ +#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ + (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) + +/* + * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the + * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all + * header macros which use NCAPINTS need to be changed. The duplicated macro + * use causes the compiler to issue errors for all headers so that all usage + * sites can be corrected. + */ +#define REQUIRED_MASK_BIT_SET(feature_bit) \ + ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + REQUIRED_MASK_CHECK || \ + BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + +#define DISABLED_MASK_BIT_SET(feature_bit) \ + ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + DISABLED_MASK_CHECK || \ + BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + +#define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + test_cpu_cap(c, bit)) + +#define this_cpu_has(bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, \ + (unsigned long __percpu *)&cpu_info.x86_capability)) + +/* + * This macro is for detection of features which need kernel + * infrastructure to be used. It may *not* directly test the CPU + * itself. Use the cpu_has() family if you want true runtime + * testing of CPU features, like in hypervisor code where you are + * supporting a possible guest feature where host support for it + * is not relevant. + */ +#define cpu_feature_enabled(bit) \ + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) + +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) + +#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) + +extern void setup_clear_cpu_cap(unsigned int bit); +extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); + +#define setup_force_cpu_cap(bit) do { \ + set_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, (unsigned long *)cpu_caps_set); \ +} while (0) + +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + +/* + * Static testing of CPU features. Used the same as boot_cpu_has(). It + * statically patches the target code for additional performance. Use + * static_cpu_has() only in fast paths, where every cycle counts. Which + * means that the boot_cpu_has() variant is already fast enough for the + * majority of cases and you should stick to using it as it is generally + * only two instructions: a RIP-relative MOV and a TEST. + * + * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know + * that this is only used on a fallback path and will sometimes cause + * it to manifest the address of boot_cpu_data in a register, fouling + * the mainline (post-initialization) code. + */ +static __always_inline bool _static_cpu_has(u16 bit) +{ + asm_volatile_goto( + ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") + ".pushsection .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".popsection\n" + : : [feature] "i" (bit), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); +t_yes: + return true; +t_no: + return false; +} + +#define static_cpu_has(bit) \ +( \ + __builtin_constant_p(boot_cpu_has(bit)) ? \ + boot_cpu_has(bit) : \ + _static_cpu_has(bit) \ +) + +#define cpu_has_bug(c, bit) cpu_has(c, (bit)) +#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) +#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) + +#define static_cpu_has_bug(bit) static_cpu_has((bit)) +#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) +#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) + +#define MAX_CPU_FEATURES (NCAPINTS * 32) +#define cpu_have_feature boot_cpu_has + +#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" +#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ + boot_cpu_data.x86_model + +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h new file mode 100644 index 000000000..b12270879 --- /dev/null +++ b/arch/x86/include/asm/cpufeatures.h @@ -0,0 +1,480 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUFEATURES_H +#define _ASM_X86_CPUFEATURES_H + +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#include +#endif + +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include +#endif + +/* + * Defines x86 CPU feature bits + */ +#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NBUGINTS 2 /* N 32-bit bug flags */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + * + * When adding new features here that depend on other features, + * please update the table in kernel/cpu/cpuid-deps.c as well. + */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ + +/* CPU types for specific tunings: */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc, word 7. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ + +/* Virtualization flags: Linux defined, word 8 */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ + +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ + +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ + +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0xf, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + + +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ + +/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ + +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ + +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ + +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ + +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ + +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + +#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ + +/* + * BUG word(s) + */ +#define X86_BUG(x) (NCAPINTS*32 + (x)) + +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#ifdef CONFIG_X86_32 +/* + * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional + * to avoid confusion. + */ +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#endif +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ + +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h new file mode 100644 index 000000000..70b2db181 --- /dev/null +++ b/arch/x86/include/asm/cpuid.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CPUID-related helpers/definitions + * + * Derived from arch/x86/kvm/cpuid.c + */ + +#ifndef _ASM_X86_CPUID_H +#define _ASM_X86_CPUID_H + +static __always_inline bool cpuid_function_is_indexed(u32 function) +{ + switch (function) { + case 4: + case 7: + case 0xb: + case 0xd: + case 0xf: + case 0x10: + case 0x12: + case 0x14: + case 0x17: + case 0x18: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x8000001d: + return true; + } + + return false; +} + +#endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h new file mode 100644 index 000000000..c8b39c671 --- /dev/null +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +void arch_haltpoll_enable(unsigned int cpu); +void arch_haltpoll_disable(unsigned int cpu); + +#endif diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 000000000..c5aed9e92 --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include + +extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; + +extern void setup_cpu_local_masks(void); + +/* + * NMI and MCE exceptions need cpu_is_offline() _really_ early, + * provide an arch_ special for them to avoid instrumentation. + */ +#if NR_CPUS > 1 +static __always_inline bool arch_cpu_online(int cpu) +{ + return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); +} + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} +#else +static __always_inline bool arch_cpu_online(int cpu) +{ + return cpu == 0; +} + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + return; +} +#endif + +#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h new file mode 100644 index 000000000..8b6bd6353 --- /dev/null +++ b/arch/x86/include/asm/crash.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CRASH_H +#define _ASM_X86_CRASH_H + +struct kimage; + +int crash_load_segments(struct kimage *image); +int crash_setup_memmap_entries(struct kimage *image, + struct boot_params *params); +void crash_smp_send_stop(void); + +#endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h new file mode 100644 index 000000000..3e204e614 --- /dev/null +++ b/arch/x86/include/asm/current.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CURRENT_H +#define _ASM_X86_CURRENT_H + +#include +#include + +#ifndef __ASSEMBLY__ +struct task_struct; + +DECLARE_PER_CPU(struct task_struct *, current_task); + +static __always_inline struct task_struct *get_current(void) +{ + return this_cpu_read_stable(current_task); +} + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h new file mode 100644 index 000000000..9ed8343c9 --- /dev/null +++ b/arch/x86/include/asm/debugreg.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DEBUGREG_H +#define _ASM_X86_DEBUGREG_H + + +#include +#include + +DECLARE_PER_CPU(unsigned long, cpu_dr7); + +#ifndef CONFIG_PARAVIRT_XXL +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) +#endif + +static __always_inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("mov %%db0, %0" :"=r" (val)); + break; + case 1: + asm("mov %%db1, %0" :"=r" (val)); + break; + case 2: + asm("mov %%db2, %0" :"=r" (val)); + break; + case 3: + asm("mov %%db3, %0" :"=r" (val)); + break; + case 6: + asm("mov %%db6, %0" :"=r" (val)); + break; + case 7: + /* + * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them + * with other code. + * + * This is needed because a DR7 access can cause a #VC exception + * when running under SEV-ES. Taking a #VC exception is not a + * safe thing to do just anywhere in the entry code and + * re-ordering might place the access into an unsafe location. + * + * This happened in the NMI handler, where the DR7 read was + * re-ordered to happen before the call to sev_es_ist_enter(), + * causing stack recursion. + */ + asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER); + break; + default: + BUG(); + } + return val; +} + +static __always_inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("mov %0, %%db0" ::"r" (value)); + break; + case 1: + asm("mov %0, %%db1" ::"r" (value)); + break; + case 2: + asm("mov %0, %%db2" ::"r" (value)); + break; + case 3: + asm("mov %0, %%db3" ::"r" (value)); + break; + case 6: + asm("mov %0, %%db6" ::"r" (value)); + break; + case 7: + /* + * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them + * with other code. + * + * While is didn't happen with a DR7 write (see the DR7 read + * comment above which explains where it happened), add the + * __FORCE_ORDER here too to avoid similar problems in the + * future. + */ + asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER); + break; + default: + BUG(); + } +} + +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +static __always_inline bool hw_breakpoint_active(void) +{ + return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; +} + +extern void hw_breakpoint_restore(void); + +static __always_inline unsigned long local_db_save(void) +{ + unsigned long dr7; + + if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active()) + return 0; + + get_debugreg(dr7, 7); + dr7 &= ~0x400; /* architecturally set bit */ + if (dr7) + set_debugreg(0, 7); + /* + * Ensure the compiler doesn't lower the above statements into + * the critical section; disabling breakpoints late would not + * be good. + */ + barrier(); + + return dr7; +} + +static __always_inline void local_db_restore(unsigned long dr7) +{ + /* + * Ensure the compiler doesn't raise this statement into + * the critical section; enabling breakpoints early would + * not be good. + */ + barrier(); + if (dr7) + set_debugreg(dr7, 7); +} + +#ifdef CONFIG_CPU_SUP_AMD +extern void set_dr_addr_mask(unsigned long mask, int dr); +#else +static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +#endif + +#endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h new file mode 100644 index 000000000..630891d25 --- /dev/null +++ b/arch/x86/include/asm/delay.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DELAY_H +#define _ASM_X86_DELAY_H + +#include +#include + +void __init use_tsc_delay(void); +void __init use_tpause_delay(void); +void use_mwaitx_delay(void); + +#endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h new file mode 100644 index 000000000..ab97b22ac --- /dev/null +++ b/arch/x86/include/asm/desc.h @@ -0,0 +1,451 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DESC_H +#define _ASM_X86_DESC_H + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) +{ + desc->limit0 = info->limit & 0x0ffff; + + desc->base0 = (info->base_addr & 0x0000ffff); + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; + /* Set the ACCESS bit so it can be mapped RO */ + desc->type |= 1; + + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; + desc->limit1 = (info->limit & 0xf0000) >> 16; + desc->avl = info->useable; + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + + desc->base2 = (info->base_addr & 0xff000000) >> 24; + /* + * Don't allow setting of the lm bit. It would confuse + * user_64bit_mode and would get overridden by sysret anyway. + */ + desc->l = 0; +} + +struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); + +/* Provide the original GDT */ +static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu) +{ + return per_cpu(gdt_page, cpu).gdt; +} + +/* Provide the current original GDT */ +static inline struct desc_struct *get_current_gdt_rw(void) +{ + return this_cpu_ptr(&gdt_page)->gdt; +} + +/* Provide the fixmap address of the remapped GDT */ +static inline struct desc_struct *get_cpu_gdt_ro(int cpu) +{ + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; +} + +/* Provide the current read-only GDT */ +static inline struct desc_struct *get_current_gdt_ro(void) +{ + return get_cpu_gdt_ro(smp_processor_id()); +} + +/* Provide the physical address of the GDT page. */ +static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu) +{ + return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu)); +} + +static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate->offset_low = (u16) func; + gate->bits.p = 1; + gate->bits.dpl = dpl; + gate->bits.zero = 0; + gate->bits.type = type; + gate->offset_middle = (u16) (func >> 16); +#ifdef CONFIG_X86_64 + gate->segment = __KERNEL_CS; + gate->bits.ist = ist; + gate->reserved = 0; + gate->offset_high = (u32) (func >> 32); +#else + gate->segment = seg; + gate->bits.ist = 0; +#endif +} + +static inline int desc_empty(const void *ptr) +{ + const u32 *desc = ptr; + + return !(desc[0] | desc[1]); +} + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) + +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_tr(tr) (tr = native_store_tr()) + +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt + +#define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc) +#define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type) +#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ +} +#endif /* CONFIG_PARAVIRT_XXL */ + +#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) + +static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) +{ + memcpy(&idt[entry], gate, sizeof(*gate)); +} + +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) +{ + memcpy(&ldt[entry], desc, 8); +} + +static inline void +native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type) +{ + unsigned int size; + + switch (type) { + case DESC_TSS: size = sizeof(tss_desc); break; + case DESC_LDT: size = sizeof(ldt_desc); break; + default: size = sizeof(*gdt); break; + } + + memcpy(&gdt[entry], desc, size); +} + +static inline void set_tssldt_descriptor(void *d, unsigned long addr, + unsigned type, unsigned size) +{ + struct ldttss_desc *desc = d; + + memset(desc, 0, sizeof(*desc)); + + desc->limit0 = (u16) size; + desc->base0 = (u16) addr; + desc->base1 = (addr >> 16) & 0xFF; + desc->type = type; + desc->p = 1; + desc->limit1 = (size >> 16) & 0xF; + desc->base2 = (addr >> 24) & 0xFF; +#ifdef CONFIG_X86_64 + desc->base3 = (u32) (addr >> 32); +#endif +} + +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) +{ + struct desc_struct *d = get_cpu_gdt_rw(cpu); + tss_desc tss; + + set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, + __KERNEL_TSS_LIMIT); + write_gdt_entry(d, entry, &tss, DESC_TSS); +} + +#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) + +static inline void native_set_ldt(const void *addr, unsigned int entries) +{ + if (likely(entries == 0)) + asm volatile("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + ldt_desc ldt; + + set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT, + entries * LDT_ENTRY_SIZE - 1); + write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT, + &ldt, DESC_LDT); + asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } +} + +static inline void native_load_gdt(const struct desc_ptr *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static __always_inline void native_load_idt(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static inline void native_store_gdt(struct desc_ptr *dtr) +{ + asm volatile("sgdt %0":"=m" (*dtr)); +} + +static inline void store_idt(struct desc_ptr *dtr) +{ + asm volatile("sidt %0":"=m" (*dtr)); +} + +static inline void native_gdt_invalidate(void) +{ + const struct desc_ptr invalid_gdt = { + .address = 0, + .size = 0 + }; + + native_load_gdt(&invalid_gdt); +} + +static inline void native_idt_invalidate(void) +{ + const struct desc_ptr invalid_idt = { + .address = 0, + .size = 0 + }; + + native_load_idt(&invalid_idt); +} + +/* + * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is + * a read-only remapping. To prevent a page fault, the GDT is switched to the + * original writeable version when needed. + */ +#ifdef CONFIG_X86_64 +static inline void native_load_tr_desc(void) +{ + struct desc_ptr gdt; + int cpu = raw_smp_processor_id(); + bool restore = 0; + struct desc_struct *fixmap_gdt; + + native_store_gdt(&gdt); + fixmap_gdt = get_cpu_gdt_ro(cpu); + + /* + * If the current GDT is the read-only fixmap, swap to the original + * writeable version. Swap back at the end. + */ + if (gdt.address == (unsigned long)fixmap_gdt) { + load_direct_gdt(cpu); + restore = 1; + } + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); + if (restore) + load_fixmap_gdt(cpu); +} +#else +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} +#endif + +static inline unsigned long native_store_tr(void) +{ + unsigned long tr; + + asm volatile("str %0":"=r" (tr)); + + return tr; +} + +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ + struct desc_struct *gdt = get_cpu_gdt_rw(cpu); + unsigned int i; + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; +} + +DECLARE_PER_CPU(bool, __tss_limit_invalid); + +static inline void force_reload_TR(void) +{ + struct desc_struct *d = get_current_gdt_rw(); + tss_desc tss; + + memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc)); + + /* + * LTR requires an available TSS, and the TSS is currently + * busy. Make it be available so that LTR will work. + */ + tss.type = DESC_TSS; + write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); + + load_TR_desc(); + this_cpu_write(__tss_limit_invalid, false); +} + +/* + * Call this if you need the TSS limit to be correct, which should be the case + * if and only if you have TIF_IO_BITMAP set or you're switching to a task + * with TIF_IO_BITMAP set. + */ +static inline void refresh_tss_limit(void) +{ + DEBUG_LOCKS_WARN_ON(preemptible()); + + if (unlikely(this_cpu_read(__tss_limit_invalid))) + force_reload_TR(); +} + +/* + * If you do something evil that corrupts the cached TSS limit (I'm looking + * at you, VMX exits), call this function. + * + * The optimization here is that the TSS limit only matters for Linux if the + * IO bitmap is in use. If the TSS limit gets forced to its minimum value, + * everything works except that IO bitmap will be ignored and all CPL 3 IO + * instructions will #GP, which is exactly what we want for normal tasks. + */ +static inline void invalidate_tss_limit(void) +{ + DEBUG_LOCKS_WARN_ON(preemptible()); + + if (unlikely(test_thread_flag(TIF_IO_BITMAP))) + force_reload_TR(); + else + this_cpu_write(__tss_limit_invalid, true); +} + +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ +#define LDT_empty(info) \ + ((info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0) + +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ +static inline bool LDT_zero(const struct user_desc *info) +{ + return (info->base_addr == 0 && + info->limit == 0 && + info->contents == 0 && + info->read_exec_only == 0 && + info->seg_32bit == 0 && + info->limit_in_pages == 0 && + info->seg_not_present == 0 && + info->useable == 0); +} + +static inline void clear_LDT(void) +{ + set_ldt(NULL, 0); +} + +static inline unsigned long get_desc_base(const struct desc_struct *desc) +{ + return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +} + +static inline void set_desc_base(struct desc_struct *desc, unsigned long base) +{ + desc->base0 = base & 0xffff; + desc->base1 = (base >> 16) & 0xff; + desc->base2 = (base >> 24) & 0xff; +} + +static inline unsigned long get_desc_limit(const struct desc_struct *desc) +{ + return desc->limit0 | (desc->limit1 << 16); +} + +static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) +{ + desc->limit0 = limit & 0xffff; + desc->limit1 = (limit >> 16) & 0xf; +} + +void alloc_intr_gate(unsigned int n, const void *addr); + +static inline void init_idt_data(struct idt_data *data, unsigned int n, + const void *addr) +{ + BUG_ON(n > 0xFF); + + memset(data, 0, sizeof(*data)); + data->vector = n; + data->addr = addr; + data->segment = __KERNEL_CS; + data->bits.type = GATE_INTERRUPT; + data->bits.p = 1; +} + +static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) +{ + unsigned long addr = (unsigned long) d->addr; + + gate->offset_low = (u16) addr; + gate->segment = (u16) d->segment; + gate->bits = d->bits; + gate->offset_middle = (u16) (addr >> 16); +#ifdef CONFIG_X86_64 + gate->offset_high = (u32) (addr >> 32); + gate->reserved = 0; +#endif +} + +extern unsigned long system_vectors[]; + +extern void load_current_idt(void); +extern void idt_setup_early_handler(void); +extern void idt_setup_early_traps(void); +extern void idt_setup_traps(void); +extern void idt_setup_apic_and_irq_gates(void); +extern bool idt_is_f00f_address(unsigned long address); + +#ifdef CONFIG_X86_64 +extern void idt_setup_early_pf(void); +#else +static inline void idt_setup_early_pf(void) { } +#endif + +extern void idt_invalidate(void); + +#endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h new file mode 100644 index 000000000..f7e7099af --- /dev/null +++ b/arch/x86/include/asm/desc_defs.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Written 2000 by Andi Kleen */ +#ifndef _ASM_X86_DESC_DEFS_H +#define _ASM_X86_DESC_DEFS_H + +/* + * Segment descriptor structure definitions, usable from both x86_64 and i386 + * archs. + */ + +#ifndef __ASSEMBLY__ + +#include + +/* 8 byte segment descriptor */ +struct desc_struct { + u16 limit0; + u16 base0; + u16 base1: 8, type: 4, s: 1, dpl: 2, p: 1; + u16 limit1: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; +} __attribute__((packed)); + +#define GDT_ENTRY_INIT(flags, base, limit) \ + { \ + .limit0 = (u16) (limit), \ + .limit1 = ((limit) >> 16) & 0x0F, \ + .base0 = (u16) (base), \ + .base1 = ((base) >> 16) & 0xFF, \ + .base2 = ((base) >> 24) & 0xFF, \ + .type = (flags & 0x0f), \ + .s = (flags >> 4) & 0x01, \ + .dpl = (flags >> 5) & 0x03, \ + .p = (flags >> 7) & 0x01, \ + .avl = (flags >> 12) & 0x01, \ + .l = (flags >> 13) & 0x01, \ + .d = (flags >> 14) & 0x01, \ + .g = (flags >> 15) & 0x01, \ + } + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, + GATE_TASK = 0x5, +}; + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, + DESCTYPE_S = 0x10, /* !system */ +}; + +/* LDT or TSS descriptor in the GDT. */ +struct ldttss_desc { + u16 limit0; + u16 base0; + + u16 base1 : 8, type : 5, dpl : 2, p : 1; + u16 limit1 : 4, zero0 : 3, g : 1, base2 : 8; +#ifdef CONFIG_X86_64 + u32 base3; + u32 zero1; +#endif +} __attribute__((packed)); + +typedef struct ldttss_desc ldt_desc; +typedef struct ldttss_desc tss_desc; + +struct idt_bits { + u16 ist : 3, + zero : 5, + type : 5, + dpl : 2, + p : 1; +} __attribute__((packed)); + +struct idt_data { + unsigned int vector; + unsigned int segment; + struct idt_bits bits; + const void *addr; +}; + +struct gate_struct { + u16 offset_low; + u16 segment; + struct idt_bits bits; + u16 offset_middle; +#ifdef CONFIG_X86_64 + u32 offset_high; + u32 reserved; +#endif +} __attribute__((packed)); + +typedef struct gate_struct gate_desc; + +static inline unsigned long gate_offset(const gate_desc *g) +{ +#ifdef CONFIG_X86_64 + return g->offset_low | ((unsigned long)g->offset_middle << 16) | + ((unsigned long) g->offset_high << 32); +#else + return g->offset_low | ((unsigned long)g->offset_middle << 16); +#endif +} + +static inline unsigned long gate_segment(const gate_desc *g) +{ + return g->segment; +} + +struct desc_ptr { + unsigned short size; + unsigned long address; +} __attribute__((packed)) ; + +#endif /* !__ASSEMBLY__ */ + +/* Boot IDT definitions */ +#define BOOT_IDT_ENTRIES 32 + +/* Access rights as returned by LAR */ +#define AR_TYPE_RODATA (0 * (1 << 9)) +#define AR_TYPE_RWDATA (1 * (1 << 9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9)) +#define AR_TYPE_XOCODE (4 * (1 << 9)) +#define AR_TYPE_XRCODE (5 * (1 << 9)) +#define AR_TYPE_XOCODE_CONF (6 * (1 << 9)) +#define AR_TYPE_XRCODE_CONF (7 * (1 << 9)) +#define AR_TYPE_MASK (7 * (1 << 9)) + +#define AR_DPL0 (0 * (1 << 13)) +#define AR_DPL3 (3 * (1 << 13)) +#define AR_DPL_MASK (3 * (1 << 13)) + +#define AR_A (1 << 8) /* "Accessed" */ +#define AR_S (1 << 12) /* If clear, "System" segment */ +#define AR_P (1 << 15) /* "Present" */ +#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */ +#define AR_L (1 << 21) /* "Long mode" for code segments */ +#define AR_DB (1 << 22) /* D/B, effect depends on type */ +#define AR_G (1 << 23) /* "Granularity" (limit in pages) */ + +#endif /* _ASM_X86_DESC_DEFS_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h new file mode 100644 index 000000000..7c0a52ca2 --- /dev/null +++ b/arch/x86/include/asm/device.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DEVICE_H +#define _ASM_X86_DEVICE_H + +struct dev_archdata { +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_X86_DEVICE_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h new file mode 100644 index 000000000..000037078 --- /dev/null +++ b/arch/x86/include/asm/disabled-features.h @@ -0,0 +1,117 @@ +#ifndef _ASM_X86_DISABLED_FEATURES_H +#define _ASM_X86_DISABLED_FEATURES_H + +/* These features, although they might be available in a CPU + * will not be used because the compile options to support + * them are not present. + * + * This code allows them to be checked and disabled at + * compile time without an explicit #ifdef. Use + * cpu_feature_enabled(). + */ + +#ifdef CONFIG_X86_UMIP +# define DISABLE_UMIP 0 +#else +# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) +#endif + +#ifdef CONFIG_X86_64 +# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) +# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) +# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) +# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +# define DISABLE_PCID 0 +#else +# define DISABLE_VME 0 +# define DISABLE_K6_MTRR 0 +# define DISABLE_CYRIX_ARR 0 +# define DISABLE_CENTAUR_MCR 0 +# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +# define DISABLE_PKU 0 +# define DISABLE_OSPKE 0 +#else +# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) +# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) +#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ + +#ifdef CONFIG_X86_5LEVEL +# define DISABLE_LA57 0 +#else +# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) +#endif + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif + +#ifdef CONFIG_X86_SGX +# define DISABLE_SGX 0 +#else +# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) +#endif + +#ifdef CONFIG_INTEL_TDX_GUEST +# define DISABLE_TDX_GUEST 0 +#else +# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) +#endif + +/* + * Make sure to add features to the correct mask + */ +#define DISABLED_MASK0 (DISABLE_VME) +#define DISABLED_MASK1 0 +#define DISABLED_MASK2 0 +#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) +#define DISABLED_MASK4 (DISABLE_PCID) +#define DISABLED_MASK5 0 +#define DISABLED_MASK6 0 +#define DISABLED_MASK7 (DISABLE_PTI) +#define DISABLED_MASK8 (DISABLE_TDX_GUEST) +#define DISABLED_MASK9 (DISABLE_SGX) +#define DISABLED_MASK10 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) +#define DISABLED_MASK12 0 +#define DISABLED_MASK13 0 +#define DISABLED_MASK14 0 +#define DISABLED_MASK15 0 +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ + DISABLE_ENQCMD) +#define DISABLED_MASK17 0 +#define DISABLED_MASK18 0 +#define DISABLED_MASK19 0 +#define DISABLED_MASK20 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) + +#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h new file mode 100644 index 000000000..b8f1dc076 --- /dev/null +++ b/arch/x86/include/asm/div64.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DIV64_H +#define _ASM_X86_DIV64_H + +#ifdef CONFIG_X86_32 + +#include +#include + +/* + * do_div() is NOT a C function. It wants to return + * two values (the quotient and the remainder), but + * since that doesn't work very well in C, what it + * does is: + * + * - modifies the 64-bit dividend _in_place_ + * - returns the 32-bit remainder + * + * This ends up being the most efficient "calling + * convention" on x86. + */ +#define do_div(n, base) \ +({ \ + unsigned long __upper, __low, __high, __mod, __base; \ + __base = (base); \ + if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \ + __mod = n & (__base - 1); \ + n >>= ilog2(__base); \ + } else { \ + asm("" : "=a" (__low), "=d" (__high) : "A" (n));\ + __upper = __high; \ + if (__high) { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + asm("divl %2" : "=a" (__low), "=d" (__mod) \ + : "rm" (__base), "0" (__low), "1" (__upper)); \ + asm("" : "=A" (n) : "a" (__low), "d" (__high)); \ + } \ + __mod; \ +}) + +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + union { + u64 v64; + u32 v32[2]; + } d = { dividend }; + u32 upper; + + upper = d.v32[1]; + d.v32[1] = 0; + if (upper >= divisor) { + d.v32[1] = upper / divisor; + upper %= divisor; + } + asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) : + "rm" (divisor), "0" (d.v32[0]), "1" (upper)); + return d.v64; +} +#define div_u64_rem div_u64_rem + +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#define mul_u32_u32 mul_u32_u32 + +#else +# include + +/* + * Will generate an #DE when the result doesn't fit u64, could fix with an + * __ex_table[] entry when it becomes an issue. + */ +static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) +{ + u64 q; + + asm ("mulq %2; divq %3" : "=a" (q) + : "a" (a), "rm" (mul), "rm" (div) + : "rdx"); + + return q; +} +#define mul_u64_u64_div_u64 mul_u64_u64_div_u64 + +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) +{ + return mul_u64_u64_div_u64(a, mul, div); +} +#define mul_u64_u32_div mul_u64_u32_div + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_DIV64_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h new file mode 100644 index 000000000..1c66708e3 --- /dev/null +++ b/arch/x86/include/asm/dma-mapping.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DMA_MAPPING_H +#define _ASM_X86_DMA_MAPPING_H + +extern const struct dma_map_ops *dma_ops; + +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +{ + return dma_ops; +} + +#endif diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h new file mode 100644 index 000000000..8ae6e0e11 --- /dev/null +++ b/arch/x86/include/asm/dma.h @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + */ + +#ifndef _ASM_X86_DMA_H +#define _ASM_X86_DMA_H + +#include /* And spinlocks */ +#include /* need byte IO */ + +#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER +#define dma_outb outb_p +#else +#define dma_outb outb +#endif + +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * DMA transfers are limited to the lower 16MB of _physical_ memory. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +#define MAX_DMA_CHANNELS 8 + +/* 16MB ISA DMA zone */ +#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) + +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) + +#ifdef CONFIG_X86_32 +/* The maximum address that we can perform a DMA transfer to on this platform */ +#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) +#else +/* Compat define for old dma zone */ +#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) +#endif + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_PAGE_0 0x87 /* DMA page registers */ +#define DMA_PAGE_1 0x83 +#define DMA_PAGE_2 0x81 +#define DMA_PAGE_3 0x82 +#define DMA_PAGE_5 0x8B +#define DMA_PAGE_6 0x89 +#define DMA_PAGE_7 0x8A + +/* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_READ 0x44 +/* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 +/* pass thru DREQ->HRQ, DACK<-HLDA only */ +#define DMA_MODE_CASCADE 0xC0 + +#define DMA_AUTOINIT 0x10 + + +#ifdef CONFIG_ISA_DMA_API +extern spinlock_t dma_spin_lock; + +static inline unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static inline void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} +#endif /* CONFIG_ISA_DMA_API */ + +/* enable/disable a specific DMA channel */ +static inline void enable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +} + +static inline void disable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while holding the DMA lock ! --- + */ +static inline void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static inline void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr <= 3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr & 3), DMA2_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static inline void set_dma_page(unsigned int dmanr, char pagenr) +{ + switch (dmanr) { + case 0: + dma_outb(pagenr, DMA_PAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_PAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_PAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_PAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_PAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_PAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_PAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static inline void set_dma_addr(unsigned int dmanr, unsigned int a) +{ + set_dma_page(dmanr, a>>16); + if (dmanr <= 3) { + dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + } else { + dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + } +} + + +/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static inline void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + dma_outb((count >> 8) & 0xff, + ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + } else { + dma_outb((count >> 1) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + dma_outb((count >> 9) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static inline int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port; + /* using short to get 16-bit wrap around */ + unsigned short count; + + io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE + : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr <= 3) ? count : (count << 1); +} + + +/* These are in kernel/dma.c because x86 uses CONFIG_GENERIC_ISA_DMA */ +#ifdef CONFIG_ISA_DMA_API +extern int request_dma(unsigned int dmanr, const char *device_id); +extern void free_dma(unsigned int dmanr); +#endif + +#endif /* _ASM_X86_DMA_H */ diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h new file mode 100644 index 000000000..b825cb201 --- /dev/null +++ b/arch/x86/include/asm/dmi.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DMI_H +#define _ASM_X86_DMI_H + +#include +#include +#include + +#include + +static __always_inline __init void *dmi_alloc(unsigned len) +{ + return extend_brk(len, sizeof(int)); +} + +/* Use early IO mappings for DMI because it's initialized early */ +#define dmi_early_remap early_memremap +#define dmi_early_unmap early_memunmap +#define dmi_remap(_x, _l) memremap(_x, _l, MEMREMAP_WB) +#define dmi_unmap(_x) memunmap(_x) + +#endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h new file mode 100644 index 000000000..54a6e4a2e --- /dev/null +++ b/arch/x86/include/asm/doublefault.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DOUBLEFAULT_H +#define _ASM_X86_DOUBLEFAULT_H + +#ifdef CONFIG_X86_32 +extern void doublefault_init_cpu_tss(void); +#else +static inline void doublefault_init_cpu_tss(void) +{ +} +#endif + +#endif /* _ASM_X86_DOUBLEFAULT_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h new file mode 100644 index 000000000..430fca13b --- /dev/null +++ b/arch/x86/include/asm/dwarf2.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DWARF2_H +#define _ASM_X86_DWARF2_H + +#ifndef __ASSEMBLY__ +#warning "asm/dwarf2.h should be only included in pure assembly files" +#endif + +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined +#define CFI_ESCAPE .cfi_escape + +#ifndef BUILD_VDSO + /* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * The latter we currently just discard since we don't do DWARF + * unwinding at runtime. So only the offline DWARF information is + * useful to anyone. Note we should not use this directive if we + * ever decide to enable DWARF unwinding at runtime. + */ + .cfi_sections .debug_frame +#else + /* + * For the vDSO, emit both runtime unwind information and debug + * symbols for the .dbg file. + */ + .cfi_sections .eh_frame, .debug_frame +#endif + +#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h new file mode 100644 index 000000000..e8f58ddd0 --- /dev/null +++ b/arch/x86/include/asm/e820/api.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_E820_API_H +#define _ASM_E820_API_H + +#include + +extern struct e820_table *e820_table; +extern struct e820_table *e820_table_kexec; +extern struct e820_table *e820_table_firmware; + +extern unsigned long pci_mem_start; + +extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type); +extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type); +extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); + +extern void e820__range_add (u64 start, u64 size, enum e820_type type); +extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); +extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); + +extern void e820__print_table(char *who); +extern int e820__update_table(struct e820_table *table); +extern void e820__update_table_print(void); + +extern unsigned long e820__end_of_ram_pfn(void); +extern unsigned long e820__end_of_low_ram_pfn(void); + +extern u64 e820__memblock_alloc_reserved(u64 size, u64 align); +extern void e820__memblock_setup(void); + +extern void e820__reserve_setup_data(void); +extern void e820__finish_early_params(void); +extern void e820__reserve_resources(void); +extern void e820__reserve_resources_late(void); + +extern void e820__memory_setup(void); +extern void e820__memory_setup_extended(u64 phys_addr, u32 data_len); +extern char *e820__memory_setup_default(void); +extern void e820__setup_pci_gap(void); + +extern void e820__reallocate_tables(void); +extern void e820__register_nosave_regions(unsigned long limit_pfn); + +extern int e820__get_entry_type(u64 start, u64 end); + +/* + * Returns true iff the specified range [start,end) is completely contained inside + * the ISA region. + */ +static inline bool is_ISA_range(u64 start, u64 end) +{ + return start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS; +} + +#endif /* _ASM_E820_API_H */ diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h new file mode 100644 index 000000000..314f75d88 --- /dev/null +++ b/arch/x86/include/asm/e820/types.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_E820_TYPES_H +#define _ASM_E820_TYPES_H + +#include + +/* + * These are the E820 types known to the kernel: + */ +enum e820_type { + E820_TYPE_RAM = 1, + E820_TYPE_RESERVED = 2, + E820_TYPE_ACPI = 3, + E820_TYPE_NVS = 4, + E820_TYPE_UNUSABLE = 5, + E820_TYPE_PMEM = 7, + + /* + * This is a non-standardized way to represent ADR or + * NVDIMM regions that persist over a reboot. + * + * The kernel will ignore their special capabilities + * unless the CONFIG_X86_PMEM_LEGACY=y option is set. + * + * ( Note that older platforms also used 6 for the same + * type of memory, but newer versions switched to 12 as + * 6 was assigned differently. Some time they will learn... ) + */ + E820_TYPE_PRAM = 12, + + /* + * Special-purpose memory is indicated to the system via the + * EFI_MEMORY_SP attribute. Define an e820 translation of this + * memory type for the purpose of reserving this range and + * marking it with the IORES_DESC_SOFT_RESERVED designation. + */ + E820_TYPE_SOFT_RESERVED = 0xefffffff, + + /* + * Reserved RAM used by the kernel itself if + * CONFIG_INTEL_TXT=y is enabled, memory of this type + * will be included in the S3 integrity calculation + * and so should not include any memory that the BIOS + * might alter over the S3 transition: + */ + E820_TYPE_RESERVED_KERN = 128, +}; + +/* + * A single E820 map entry, describing a memory range of [addr...addr+size-1], + * of 'type' memory type: + * + * (We pack it because there can be thousands of them on large systems.) + */ +struct e820_entry { + u64 addr; + u64 size; + enum e820_type type; +} __attribute__((packed)); + +/* + * The legacy E820 BIOS limits us to 128 (E820_MAX_ENTRIES_ZEROPAGE) nodes + * due to the constrained space in the zeropage. + * + * On large systems we can easily have thousands of nodes with RAM, + * which cannot be fit into so few entries - so we have a mechanism + * to extend the e820 table size at build-time, via the E820_MAX_ENTRIES + * define below. + * + * ( Those extra entries are enumerated via the EFI memory map, not + * via the legacy zeropage mechanism. ) + * + * Size our internal memory map tables to have room for these additional + * entries, based on a heuristic calculation: up to three entries per + * NUMA node, plus E820_MAX_ENTRIES_ZEROPAGE for some extra space. + * + * This allows for bootstrap/firmware quirks such as possible duplicate + * E820 entries that might need room in the same arrays, prior to the + * call to e820__update_table() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +#include + +#define E820_MAX_ENTRIES (E820_MAX_ENTRIES_ZEROPAGE + 3*MAX_NUMNODES) + +/* + * The whole array of E820 entries: + */ +struct e820_table { + __u32 nr_entries; + struct e820_entry entries[E820_MAX_ENTRIES]; +}; + +/* + * Various well-known legacy memory ranges in physical memory: + */ +#define ISA_START_ADDRESS 0x000a0000 +#define ISA_END_ADDRESS 0x00100000 + +#define BIOS_BEGIN 0x000a0000 +#define BIOS_END 0x00100000 + +#define HIGH_MEMORY 0x00100000 + +#define BIOS_ROM_BASE 0xffe00000 +#define BIOS_ROM_END 0xffffffff + +#endif /* _ASM_E820_TYPES_H */ diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h new file mode 100644 index 000000000..426fc53ff --- /dev/null +++ b/arch/x86/include/asm/edac.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EDAC_H +#define _ASM_X86_EDAC_H + +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static inline void edac_atomic_scrub(void *va, u32 size) +{ + u32 i, *virt_addr = va; + + /* + * Very carefully read and write to memory atomically so we + * are interrupt, DMA and SMP safe. + */ + for (i = 0; i < size / 4; i++, virt_addr++) + asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); +} + +#endif /* _ASM_X86_EDAC_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h new file mode 100644 index 000000000..233ae6986 --- /dev/null +++ b/arch/x86/include/asm/efi.h @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EFI_H +#define _ASM_X86_EFI_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned long efi_fw_vendor, efi_config_table; +extern unsigned long efi_mixed_mode_stack_pa; + +/* + * We map the EFI regions needed for runtime services non-contiguously, + * with preserved alignment on virtual addresses starting from -4G down + * for a total max space of 64G. This way, we provide for stable runtime + * services addresses across kernels so that a kexec'd kernel can still + * use them. + * + * This is the main reason why we're doing stable VA mappings for RT + * services. + */ + +#define EFI32_LOADER_SIGNATURE "EL32" +#define EFI64_LOADER_SIGNATURE "EL64" + +#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF + +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ + +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(9), __efi_arg_sentinel(8), \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 10; })) +#define __efi_arg_sentinel(n) , n + +/* + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. + */ + +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ +}) + +static inline void efi_fpu_begin(void) +{ + /* + * The UEFI calling convention (UEFI spec 2.3.2 and 2.3.4) requires + * that FCW and MXCSR (64-bit) must be initialized prior to calling + * UEFI code. (Oddly the spec does not require that the FPU stack + * be empty.) + */ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +} + +static inline void efi_fpu_end(void) +{ + kernel_fpu_end(); +} + +#ifdef CONFIG_X86_32 +#define arch_efi_call_virt_setup() \ +({ \ + efi_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + efi_fpu_end(); \ +}) + +#else /* !CONFIG_X86_32 */ + +#define EFI_LOADER_SIGNATURE "EL64" + +extern asmlinkage u64 __efi_call(void *fp, ...); + +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ +}) + +#define arch_efi_call_virt_setup() \ +({ \ + efi_sync_low_kernel_mappings(); \ + efi_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ + efi_enter_mm(); \ +}) + +#undef arch_efi_call_virt +#define arch_efi_call_virt(p, f, args...) ({ \ + u64 ret, ibt = ibt_save(); \ + ret = efi_call((void *)p->f, args); \ + ibt_restore(ibt); \ + ret; \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + efi_leave_mm(); \ + firmware_restrict_branch_speculation_end(); \ + efi_fpu_end(); \ +}) + +#ifdef CONFIG_KASAN +/* + * CONFIG_KASAN may redefine memset to __memset. __memset function is present + * only in kernel binary. Since the EFI stub linked into a separate binary it + * doesn't have __memset(). So we should use standard memset from + * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove. + */ +#undef memcpy +#undef memset +#undef memmove +#endif + +#endif /* CONFIG_X86_32 */ + +extern int __init efi_memblock_x86_reserve_range(void); +extern void __init efi_print_memmap(void); +extern void __init efi_map_region(efi_memory_desc_t *md); +extern void __init efi_map_region_fixed(efi_memory_desc_t *md); +extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); +extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern void __init efi_runtime_update_mappings(void); +extern void __init efi_dump_pagetable(void); +extern void __init efi_apply_memmap_quirks(void); +extern int __init efi_reuse_config(u64 tables, int nr_tables); +extern void efi_delete_dummy_variable(void); +extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); +extern void efi_free_boot_services(void); + +void efi_enter_mm(void); +void efi_leave_mm(void); + +/* kexec external ABI */ +struct efi_setup_data { + u64 fw_vendor; + u64 __unused; + u64 tables; + u64 smbios; + u64 reserved[8]; +}; + +extern u64 efi_setup; + +#ifdef CONFIG_EFI +extern efi_status_t __efi64_thunk(u32, ...); + +#define efi64_thunk(...) ({ \ + u64 __pad[3]; /* must have space for 3 args on the stack */ \ + __efi_nargs_check(efi64_thunk, 9, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__, __pad); \ +}) + +static inline bool efi_is_mixed(void) +{ + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); +} + +static inline bool efi_runtime_supported(void) +{ + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) + return true; + + return IS_ENABLED(CONFIG_EFI_MIXED); +} + +extern void parse_efi_setup(u64 phys_addr, u32 data_len); + +extern void efi_thunk_runtime_setup(void); +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map, + unsigned long systab_phys); + +/* arch specific definitions used by the stub code */ + +#ifdef CONFIG_EFI_MIXED + +#define ARCH_HAS_EFISTUB_WRAPPERS + +static inline bool efi_is_64bit(void) +{ + extern const bool efi_is64; + + return efi_is64; +} + +static inline bool efi_is_native(void) +{ + return efi_is_64bit(); +} + +#define efi_mixed_mode_cast(attr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(u32, __typeof__(attr)), \ + (unsigned long)(attr), (attr)) + +#define efi_table_attr(inst, attr) \ + (efi_is_native() \ + ? inst->attr \ + : (__typeof__(inst->attr)) \ + efi_mixed_mode_cast(inst->mixed_mode.attr)) + +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ + +static inline void *efi64_zero_upper(void *p) +{ + ((u32 *)p)[1] = 0; + return p; +} + +static inline u32 efi64_convert_status(efi_status_t status) +{ + return (u32)(status | (u64)status >> 32); +} + +#define __efi64_split(val) (val) & U32_MAX, (u64)(val) >> 32 + +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) + +#define __efi64_argmap_create_event(type, tpl, f, c, event) \ + ((type), (tpl), (f), (c), efi64_zero_upper(event)) + +#define __efi64_argmap_set_timer(event, type, time) \ + ((event), (type), lower_32_bits(time), upper_32_bits(time)) + +#define __efi64_argmap_wait_for_event(num, event, index) \ + ((num), (event), efi64_zero_upper(index)) + +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) + +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) + +#define __efi64_argmap_locate_device_path(protocol, path, handle) \ + ((protocol), (path), efi64_zero_upper(handle)) + +#define __efi64_argmap_exit(handle, status, size, data) \ + ((handle), efi64_convert_status(status), (size), (data)) + +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + +/* LoadFile */ +#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \ + ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf)) + +/* Graphics Output Protocol */ +#define __efi64_argmap_query_mode(gop, mode, size, info) \ + ((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info)) + +/* TCG2 protocol */ +#define __efi64_argmap_hash_log_extend_event(prot, fl, addr, size, ev) \ + ((prot), (fl), 0ULL, (u64)(addr), 0ULL, (u64)(size), 0ULL, ev) + +/* DXE services */ +#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \ + (__efi64_split(phys), (desc)) + +#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ + (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_, following the examples above. + */ + +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) + +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args + +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +/* The three macros below handle dispatching via the thunk if needed */ + +#define efi_call_proto(inst, func, ...) \ + (efi_is_native() \ + ? inst->func(inst, ##__VA_ARGS__) \ + : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__)) + +#define efi_bs_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table->boottime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table, \ + boottime), \ + func, __VA_ARGS__)) + +#define efi_rt_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table->runtime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table, \ + runtime), \ + func, __VA_ARGS__)) + +#define efi_dxe_call(func, ...) \ + (efi_is_native() \ + ? efi_dxe_table->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__)) + +#else /* CONFIG_EFI_MIXED */ + +static inline bool efi_is_64bit(void) +{ + return IS_ENABLED(CONFIG_X86_64); +} + +#endif /* CONFIG_EFI_MIXED */ + +extern bool efi_reboot_required(void); +extern bool efi_is_table_address(unsigned long phys_addr); + +extern void efi_reserve_boot_services(void); +#else +static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} +static inline bool efi_reboot_required(void) +{ + return false; +} +static inline bool efi_is_table_address(unsigned long phys_addr) +{ + return false; +} +static inline void efi_reserve_boot_services(void) +{ +} +#endif /* CONFIG_EFI */ + +#ifdef CONFIG_EFI_FAKE_MEMMAP +extern void __init efi_fake_memmap_early(void); +#else +static inline void efi_fake_memmap_early(void) +{ +} +#endif + +#define arch_ima_efi_boot_mode \ + ({ extern struct boot_params boot_params; boot_params.secure_boot; }) + +#endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h new file mode 100644 index 000000000..cb0ff1055 --- /dev/null +++ b/arch/x86/include/asm/elf.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ELF_H +#define _ASM_X86_ELF_H + +/* + * ELF register definitions.. + */ +#include + +#include +#include +#include +#include + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; + +#ifdef __i386__ + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#else + +/* x86-64 relocation types */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ +#define R_X86_64_PC64 24 /* Place relative 64-bit signed */ + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_X86_64 + +#endif + +#include + +#ifdef CONFIG_X86_64 +extern unsigned int vdso64_enabled; +#endif +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +extern unsigned int vdso32_enabled; +#endif + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch_ia32(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +#include + +#ifdef CONFIG_X86_32 +#include + +#define elf_check_arch(x) elf_check_arch_ia32(x) + +/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx + contains a pointer to a function which might be registered using `atexit'. + This provides a mean for the dynamic linker to call DT_FINI functions for + shared libraries that have been loaded before the code runs. + + A value of 0 tells we have no such handler. + + We might as well make sure everything else is cleared too (except for %esp), + just to make things more deterministic. + */ +#define ELF_PLAT_INIT(_r, load_addr) \ + do { \ + _r->bx = 0; _r->cx = 0; _r->dx = 0; \ + _r->si = 0; _r->di = 0; _r->bp = 0; \ + _r->ax = 0; \ +} while (0) + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different) + */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + pr_reg[0] = regs->bx; \ + pr_reg[1] = regs->cx; \ + pr_reg[2] = regs->dx; \ + pr_reg[3] = regs->si; \ + pr_reg[4] = regs->di; \ + pr_reg[5] = regs->bp; \ + pr_reg[6] = regs->ax; \ + pr_reg[7] = regs->ds; \ + pr_reg[8] = regs->es; \ + pr_reg[9] = regs->fs; \ + savesegment(gs, pr_reg[10]); \ + pr_reg[11] = regs->orig_ax; \ + pr_reg[12] = regs->ip; \ + pr_reg[13] = regs->cs; \ + pr_reg[14] = regs->flags; \ + pr_reg[15] = regs->sp; \ + pr_reg[16] = regs->ss; \ +} while (0); + +#define ELF_PLATFORM (utsname()->machine) +#define set_personality_64bit() do { } while (0) + +#else /* CONFIG_X86_32 */ + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_X86_64) + +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || \ + (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) + +#if __USER32_DS != __USER_DS +# error "The following code assumes __USER32_DS == __USER_DS" +#endif + +static inline void elf_common_init(struct thread_struct *t, + struct pt_regs *regs, const u16 ds) +{ + /* ax gets execve's return value. */ + /*regs->ax = */ regs->bx = regs->cx = regs->dx = 0; + regs->si = regs->di = regs->bp = 0; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; + t->fsbase = t->gsbase = 0; + t->fsindex = t->gsindex = 0; + t->ds = t->es = ds; +} + +#define ELF_PLAT_INIT(_r, load_addr) \ + elf_common_init(¤t->thread, _r, 0) + +#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ + elf_common_init(¤t->thread, regs, __USER_DS) + +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32); +#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \ + compat_start_thread(regs, new_ip, new_sp, ex->e_machine == EM_X86_64) + +void set_personality_ia32(bool); +#define COMPAT_SET_PERSONALITY(ex) \ + set_personality_ia32((ex).e_machine == EM_X86_64) + +#define COMPAT_ELF_PLATFORM ("i686") + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different). Assumes current is the process + * getting dumped. + */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + unsigned v; \ + (pr_reg)[0] = (regs)->r15; \ + (pr_reg)[1] = (regs)->r14; \ + (pr_reg)[2] = (regs)->r13; \ + (pr_reg)[3] = (regs)->r12; \ + (pr_reg)[4] = (regs)->bp; \ + (pr_reg)[5] = (regs)->bx; \ + (pr_reg)[6] = (regs)->r11; \ + (pr_reg)[7] = (regs)->r10; \ + (pr_reg)[8] = (regs)->r9; \ + (pr_reg)[9] = (regs)->r8; \ + (pr_reg)[10] = (regs)->ax; \ + (pr_reg)[11] = (regs)->cx; \ + (pr_reg)[12] = (regs)->dx; \ + (pr_reg)[13] = (regs)->si; \ + (pr_reg)[14] = (regs)->di; \ + (pr_reg)[15] = (regs)->orig_ax; \ + (pr_reg)[16] = (regs)->ip; \ + (pr_reg)[17] = (regs)->cs; \ + (pr_reg)[18] = (regs)->flags; \ + (pr_reg)[19] = (regs)->sp; \ + (pr_reg)[20] = (regs)->ss; \ + (pr_reg)[21] = x86_fsbase_read_cpu(); \ + (pr_reg)[22] = x86_gsbase_read_cpu_inactive(); \ + asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ + asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ + asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ + asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ +} while (0); + +/* I'm not sure if we can use '-' here */ +#define ELF_PLATFORM ("x86_64") +extern void set_personality_64bit(void); +extern unsigned int sysctl_vsyscall32; +extern int force_personality32; + +#endif /* !CONFIG_X86_32 */ + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE 4096 + +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is above 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ + (DEFAULT_MAP_WINDOW / 3 * 2)) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX]) + +extern u32 elf_hwcap2; + +/* + * HWCAP2 supplies mask with kernel enabled CPU features, so that + * the application can discover that it can safely use them. + * The bits are defined in uapi/asm/hwcap2.h. + */ +#define ELF_HWCAP2 (elf_hwcap2) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define SET_PERSONALITY(ex) set_personality_64bit() + +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + * CPU: | lacks NX* | has NX, ia32 | has NX, x86_64 | + * ELF: | | | | + * ---------------------|------------|------------------|----------------| + * missing PT_GNU_STACK | exec-all | exec-all | exec-none | + * PT_GNU_STACK == RWX | exec-stack | exec-stack | exec-stack | + * PT_GNU_STACK == RW | exec-none | exec-none | exec-none | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. + * + * *this column has no architectural effect: NX markings are ignored by + * hardware, but may have behavioral effects when "wants X" collides with + * "cannot be X" constraints in memory permission flags, as in + * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com + * + */ +#define elf_read_implies_exec(ex, executable_stack) \ + (mmap_is_ia32() && executable_stack == EXSTACK_DEFAULT) + +struct task_struct; + +#define ARCH_DLINFO_IA32 \ +do { \ + if (VDSO_CURRENT_BASE) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ + } \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ +} while (0) + +/* + * True on X86_32 or when emulating IA32 on X86_64 + */ +static inline int mmap_is_ia32(void) +{ + return IS_ENABLED(CONFIG_X86_32) || + (IS_ENABLED(CONFIG_COMPAT) && + test_thread_flag(TIF_ADDR32)); +} + +extern unsigned long task_size_32bit(void); +extern unsigned long task_size_64bit(int full_addr_space); +extern unsigned long get_mmap_base(int is_legacy); +extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); +extern unsigned long get_sigframe_size(void); + +#ifdef CONFIG_X86_32 + +#define __STACK_RND_MASK(is32bit) (0x7ff) +#define STACK_RND_MASK (0x7ff) + +#define ARCH_DLINFO ARCH_DLINFO_IA32 + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ + +#else /* CONFIG_X86_32 */ + +/* 1GB for 64bit, 8MB for 32bit */ +#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff) +#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32()) + +#define ARCH_DLINFO \ +do { \ + if (vdso64_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ +} while (0) + +/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */ +#define ARCH_DLINFO_X32 \ +do { \ + if (vdso64_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ +} while (0) + +#define AT_SYSINFO 32 + +#define COMPAT_ARCH_DLINFO \ +if (exec->e_machine == EM_X86_64) \ + ARCH_DLINFO_X32; \ +else if (IS_ENABLED(CONFIG_IA32_EMULATION)) \ + ARCH_DLINFO_IA32 + +#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + +#endif /* !CONFIG_X86_32 */ + +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) + +#define VDSO_ENTRY \ + ((unsigned long)current->mm->context.vdso + \ + vdso_image_32.sym___kernel_vsyscall) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); +extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp, bool x32); +#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \ + compat_arch_setup_additional_pages(bprm, interpreter, \ + (ex->e_machine == EM_X86_64)) + +extern bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs); + +/* Do not change the values. See get_align_mask() */ +enum align_flags { + ALIGN_VA_32 = BIT(0), + ALIGN_VA_64 = BIT(1), +}; + +struct va_alignment { + int flags; + unsigned long mask; + unsigned long bits; +} ____cacheline_aligned; + +extern struct va_alignment va_align; +extern unsigned long align_vdso_addr(unsigned long); +#endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/include/asm/elfcore-compat.h b/arch/x86/include/asm/elfcore-compat.h new file mode 100644 index 000000000..f1b6c7a8d --- /dev/null +++ b/arch/x86/include/asm/elfcore-compat.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_ELFCORE_COMPAT_H +#define _ASM_X86_ELFCORE_COMPAT_H + +#include + +/* + * On amd64 we have two 32bit ABIs - i386 and x32. The latter + * has bigger registers, so we use it for compat_elf_regset_t. + * The former uses i386_elf_prstatus and PRSTATUS_SIZE/SET_PR_FPVALID + * are used to choose the size and location of ->pr_fpvalid of + * the layout actually used. + */ +typedef struct user_regs_struct compat_elf_gregset_t; + +struct i386_elf_prstatus +{ + struct compat_elf_prstatus_common common; + struct user_regs_struct32 pr_reg; + compat_int_t pr_fpvalid; +}; + +#define PRSTATUS_SIZE \ + (user_64bit_mode(task_pt_regs(current)) \ + ? sizeof(struct compat_elf_prstatus) \ + : sizeof(struct i386_elf_prstatus)) +#define SET_PR_FPVALID(S) \ + (*(user_64bit_mode(task_pt_regs(current)) \ + ? &(S)->pr_fpvalid \ + : &((struct i386_elf_prstatus *)(S))->pr_fpvalid) = 1) + +#endif diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h new file mode 100644 index 000000000..2abde717d --- /dev/null +++ b/arch/x86/include/asm/emergency-restart.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMERGENCY_RESTART_H +#define _ASM_X86_EMERGENCY_RESTART_H + +extern void machine_emergency_restart(void); + +#endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 000000000..70f5b98a5 --- /dev/null +++ b/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */ + +#endif diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h new file mode 100644 index 000000000..b1314e41a --- /dev/null +++ b/arch/x86/include/asm/enclu.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ENCLU_H +#define _ASM_X86_ENCLU_H + +#define EENTER 0x02 +#define ERESUME 0x03 +#define EEXIT 0x04 + +#endif /* _ASM_X86_ENCLU_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h new file mode 100644 index 000000000..11203a9fe --- /dev/null +++ b/arch/x86/include/asm/entry-common.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_ENTRY_COMMON_H +#define _ASM_X86_ENTRY_COMMON_H + +#include +#include + +#include +#include +#include + +/* Check that the stack and regs on entry from user mode are sane. */ +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { + /* + * Make sure that the entry code gave us a sensible EFLAGS + * register. Native because we want to check the actual CPU + * state, not the interrupt state as imagined by Xen. + */ + unsigned long flags = native_save_fl(); + unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT; + + /* + * For !SMAP hardware we patch out CLAC on entry. + */ + if (boot_cpu_has(X86_FEATURE_SMAP) || + (IS_ENABLED(CONFIG_64BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + mask |= X86_EFLAGS_AC; + + WARN_ON_ONCE(flags & mask); + + /* We think we came from user mode. Make sure pt_regs agrees. */ + WARN_ON_ONCE(!user_mode(regs)); + + /* + * All entries from user mode (except #DF) should be on the + * normal thread stack and should have user pt_regs in the + * correct location. + */ + WARN_ON_ONCE(!on_thread_stack()); + WARN_ON_ONCE(regs != task_pt_regs(current)); + } +} +#define arch_enter_from_user_mode arch_enter_from_user_mode + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_USER_RETURN_NOTIFY) + fire_user_return_notifiers(); + + if (unlikely(ti_work & _TIF_IO_BITMAP)) + tss_update_io_bitmap(); + + fpregs_assert_state_consistent(); + if (unlikely(ti_work & _TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + +#ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before + * returning to user mode. We need to clear it *after* signal + * handling, because syscall restart has a fixup for compat + * syscalls. The fixup is exercised by the ptrace_syscall_32 + * selftest. + * + * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer + * special case only applies after poking regs and before the + * very next return to user mode. + */ + current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED); +#endif + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * but not enough for x86 stack utilization comfort. To keep + * reasonable stack head room, reduce the maximum offset to 8 bits. + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints (see cc_stack_align4/8 in + * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) + * low bits from any entropy chosen here. + * + * Therefore, final stack offset entropy will be 5 (x86_64) or + * 6 (ia32) bits. + */ + choose_random_kstack_offset(rdtsc() & 0xFF); +} +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare + +static __always_inline void arch_exit_to_user_mode(void) +{ + mds_user_clear_cpu_buffers(); + amd_clear_divider(); +} +#define arch_exit_to_user_mode arch_exit_to_user_mode + +#endif diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h new file mode 100644 index 000000000..6777480d8 --- /dev/null +++ b/arch/x86/include/asm/espfix.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ESPFIX_H +#define _ASM_X86_ESPFIX_H + +#ifdef CONFIG_X86_ESPFIX64 + +#include + +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +extern void init_espfix_bsp(void); +extern void init_espfix_ap(int cpu); +#else +static inline void init_espfix_ap(int cpu) { } +#endif + +#endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/exec.h b/arch/x86/include/asm/exec.h new file mode 100644 index 000000000..54c2e1db2 --- /dev/null +++ b/arch/x86/include/asm/exec.h @@ -0,0 +1 @@ +/* define arch_align_stack() here */ diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h new file mode 100644 index 000000000..eeed395c3 --- /dev/null +++ b/arch/x86/include/asm/extable.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EXTABLE_H +#define _ASM_X86_EXTABLE_H + +#include + +/* + * The exception table consists of two addresses relative to the + * exception table entry itself and a type selector field. + * + * The first address is of an instruction that is allowed to fault, the + * second is the target at which the program should continue. + * + * The type entry is used by fixup_exception() to select the handler to + * deal with the fault caused by the instruction in the first field. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup, data; +}; +struct pt_regs; + +#define ARCH_HAS_RELATIVE_EXTABLE + +#define swap_ex_entry_fixup(a, b, tmp, delta) \ + do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ + } while (0) + +extern int fixup_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr); +extern int fixup_bug(struct pt_regs *regs, int trapnr); +extern int ex_get_fixup_type(unsigned long ip); +extern void early_fixup_exception(struct pt_regs *regs, int trapnr); + +#ifdef CONFIG_X86_MCE +extern void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr); +#else +static inline void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) +{ + for (;;) + cpu_relax(); +} +#endif + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs); +#else +static inline bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) { return false; } +#endif + +#endif diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h new file mode 100644 index 000000000..991e31cfd --- /dev/null +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H +#define _ASM_X86_EXTABLE_FIXUP_TYPES_H + +/* + * Our IMM is signed, as such it must live at the top end of the word. Also, + * since C99 hex constants are of ambigious type, force cast the mask to 'int' + * so that FIELD_GET() will DTRT and sign extend the value when it extracts it. + */ +#define EX_DATA_TYPE_MASK ((int)0x000000FF) +#define EX_DATA_REG_MASK ((int)0x00000F00) +#define EX_DATA_FLAG_MASK ((int)0x0000F000) +#define EX_DATA_IMM_MASK ((int)0xFFFF0000) + +#define EX_DATA_REG_SHIFT 8 +#define EX_DATA_FLAG_SHIFT 12 +#define EX_DATA_IMM_SHIFT 16 + +#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT) +#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT) +#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT) + +/* segment regs */ +#define EX_REG_DS EX_DATA_REG(8) +#define EX_REG_ES EX_DATA_REG(9) +#define EX_REG_FS EX_DATA_REG(10) +#define EX_REG_GS EX_DATA_REG(11) + +/* flags */ +#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1) +#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) +#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3) + +/* types */ +#define EX_TYPE_NONE 0 +#define EX_TYPE_DEFAULT 1 +#define EX_TYPE_FAULT 2 +#define EX_TYPE_UACCESS 3 +#define EX_TYPE_COPY 4 +#define EX_TYPE_CLEAR_FS 5 +#define EX_TYPE_FPU_RESTORE 6 +#define EX_TYPE_BPF 7 +#define EX_TYPE_WRMSR 8 +#define EX_TYPE_RDMSR 9 +#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */ +#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */ +#define EX_TYPE_WRMSR_IN_MCE 12 +#define EX_TYPE_RDMSR_IN_MCE 13 +#define EX_TYPE_DEFAULT_MCE_SAFE 14 +#define EX_TYPE_FAULT_MCE_SAFE 15 + +#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */ +#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0)) + +#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ +#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) +#define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0)) +#define EX_TYPE_ONE_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(1)) + +#define EX_TYPE_FAULT_SGX 18 + +#define EX_TYPE_UCOPY_LEN 19 /* cx := reg + imm*cx */ +#define EX_TYPE_UCOPY_LEN1 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(1)) +#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4)) +#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8)) + +#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */ + +#endif diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h new file mode 100644 index 000000000..ab4c96014 --- /dev/null +++ b/arch/x86/include/asm/fb.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FB_H +#define _ASM_X86_FB_H + +#include +#include +#include + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + unsigned long prot; + + prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK; + if (boot_cpu_data.x86 > 3) + pgprot_val(vma->vm_page_prot) = + prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS); +} + +extern int fb_is_primary_device(struct fb_info *info); + +#endif /* _ASM_X86_FB_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h new file mode 100644 index 000000000..d0dcefb5c --- /dev/null +++ b/arch/x86/include/asm/fixmap.h @@ -0,0 +1,200 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 + */ + +#ifndef _ASM_X86_FIXMAP_H +#define _ASM_X86_FIXMAP_H + +#include + +/* + * Exposed to assembly code for setting up initial page tables. Cannot be + * calculated in assembly code (fixmap entries are an enum), but is sanity + * checked in the actual fixmap C code to make sure that the fixmap is + * covered fully. + */ +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define FIXMAP_PMD_NUM 2 +#else +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512)) +# define FIXMAP_PMD_NUM (KM_PMDS + 2) +#endif +/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ +#define FIXMAP_PMD_TOP 507 + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#ifdef CONFIG_X86_32 +#include +#else +#include +#endif + +/* + * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall + * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. + * Because of this, FIXADDR_TOP x86 integration was left as later work. + */ +#ifdef CONFIG_X86_32 +/* + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +extern unsigned long __FIXADDR_TOP; +#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) +#else +#define FIXADDR_TOP (round_up(VSYSCALL_ADDR + PAGE_SIZE, 1<> PAGE_SHIFT, +#endif +#endif + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, +#endif +#ifdef CONFIG_KMAP_LOCAL + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, +#ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, +#endif +#endif +#ifdef CONFIG_PARAVIRT_XXL + FIX_PARAVIRT_BOOTMAP, +#endif + +#ifdef CONFIG_ACPI_APEI_GHES + /* Used for GHES mapping from assorted contexts */ + FIX_APEI_GHES_IRQ, + FIX_APEI_GHES_NMI, +#endif + + __end_of_permanent_fixed_addresses, + + /* + * 512 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * If necessary we round it up to the next 512 pages boundary so + * that we can have a single pmd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_SLOTS 8 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + FIX_BTMAP_END = + (__end_of_permanent_fixed_addresses ^ + (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) & + -PTRS_PER_PTE + ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - + (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) + : __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, +#ifdef CONFIG_X86_32 + FIX_WP_TEST, +#endif +#ifdef CONFIG_INTEL_TXT + FIX_TBOOT_BASE, +#endif + __end_of_fixed_addresses +}; + + +extern void reserve_top_address(unsigned long reserve); + +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) + +extern int fixmaps_set; + +extern pte_t *pkmap_page_table; + +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags); + +#ifndef CONFIG_PARAVIRT_XXL +static inline void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + native_set_fixmap(idx, phys, flags); +} +#endif + +/* + * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not + * supported for MMIO addresses, so make sure that the memory encryption + * mask is not part of the page attributes. + */ +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE + +/* + * Early memremap routines used for in-place encryption. The mappings created + * by these routines are intended to be used as temporary mappings. + */ +void __init *early_memremap_encrypted(resource_size_t phys_addr, + unsigned long size); +void __init *early_memremap_encrypted_wp(resource_size_t phys_addr, + unsigned long size); +void __init *early_memremap_decrypted(resource_size_t phys_addr, + unsigned long size); +void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, + unsigned long size); + +#include + +#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) +#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0)) + +void __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h new file mode 100644 index 000000000..6ec3fc969 --- /dev/null +++ b/arch/x86/include/asm/floppy.h @@ -0,0 +1,281 @@ +/* + * Architecture specific parts of the Floppy driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 + */ +#ifndef _ASM_X86_FLOPPY_H +#define _ASM_X86_FLOPPY_H + +#include + +/* + * The DMA channel used by the floppy controller cannot access data at + * addresses >= 16MB + * + * Went back to the 1MB limit, as some people had problems with the floppy + * driver otherwise. It doesn't matter much for performance anyway, as most + * floppy accesses go through the track buffer. + */ +#define _CROSS_64KB(a, s, vdma) \ + (!(vdma) && \ + ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) + +#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1) + + +#define SW fd_routine[use_virtual_dma & 1] +#define CSW fd_routine[can_use_virtual_dma & 1] + + +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) + +#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") +#define fd_free_dma() CSW._free_dma(FLOPPY_DMA) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) +#define fd_get_dma_residue() SW._get_dma_residue(FLOPPY_DMA) +#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size) +#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io) + +#define FLOPPY_CAN_FALLBACK_ON_NODMA + +static int virtual_dma_count; +static int virtual_dma_residue; +static char *virtual_dma_addr; +static int virtual_dma_mode; +static int doing_pdma; + +static irqreturn_t floppy_hardint(int irq, void *dev_id) +{ + unsigned char st; + +#undef TRACE_FLPY_INT + +#ifdef TRACE_FLPY_INT + static int calls; + static int bytes; + static int dma_wait; +#endif + if (!doing_pdma) + return floppy_interrupt(irq, dev_id); + +#ifdef TRACE_FLPY_INT + if (!calls) + bytes = virtual_dma_count; +#endif + + { + int lcount; + char *lptr; + + for (lcount = virtual_dma_count, lptr = virtual_dma_addr; + lcount; lcount--, lptr++) { + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) + break; + if (virtual_dma_mode) + outb_p(*lptr, virtual_dma_port + FD_DATA); + else + *lptr = inb_p(virtual_dma_port + FD_DATA); + } + virtual_dma_count = lcount; + virtual_dma_addr = lptr; + st = inb(virtual_dma_port + FD_STATUS); + } + +#ifdef TRACE_FLPY_INT + calls++; +#endif + if (st == STATUS_DMA) + return IRQ_HANDLED; + if (!(st & STATUS_DMA)) { + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +#ifdef TRACE_FLPY_INT + printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + virtual_dma_count, virtual_dma_residue, calls, bytes, + dma_wait); + calls = 0; + dma_wait = 0; +#endif + doing_pdma = 0; + floppy_interrupt(irq, dev_id); + return IRQ_HANDLED; + } +#ifdef TRACE_FLPY_INT + if (!virtual_dma_count) + dma_wait++; +#endif + return IRQ_HANDLED; +} + +static void fd_disable_dma(void) +{ + if (!(can_use_virtual_dma & 1)) + disable_dma(FLOPPY_DMA); + doing_pdma = 0; + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +} + +static int vdma_request_dma(unsigned int dmanr, const char *device_id) +{ + return 0; +} + +static void vdma_nop(unsigned int dummy) +{ +} + + +static int vdma_get_dma_residue(unsigned int dummy) +{ + return virtual_dma_count + virtual_dma_residue; +} + + +static int fd_request_irq(void) +{ + if (can_use_virtual_dma) + return request_irq(FLOPPY_IRQ, floppy_hardint, + 0, "floppy", NULL); + else + return request_irq(FLOPPY_IRQ, floppy_interrupt, + 0, "floppy", NULL); +} + +static unsigned long dma_mem_alloc(unsigned long size) +{ + return __get_dma_pages(GFP_KERNEL|__GFP_NORETRY, get_order(size)); +} + + +static unsigned long vdma_mem_alloc(unsigned long size) +{ + return (unsigned long)vmalloc(size); + +} + +#define nodma_mem_alloc(size) vdma_mem_alloc(size) + +static void _fd_dma_mem_free(unsigned long addr, unsigned long size) +{ + if ((unsigned long)addr >= (unsigned long)high_memory) + vfree((void *)addr); + else + free_pages(addr, get_order(size)); +} + +#define fd_dma_mem_free(addr, size) _fd_dma_mem_free(addr, size) + +static void _fd_chose_dma_mode(char *addr, unsigned long size) +{ + if (can_use_virtual_dma == 2) { + if ((unsigned long)addr >= (unsigned long)high_memory || + isa_virt_to_bus(addr) >= 0x1000000 || + _CROSS_64KB(addr, size, 0)) + use_virtual_dma = 1; + else + use_virtual_dma = 0; + } else { + use_virtual_dma = can_use_virtual_dma & 1; + } +} + +#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size) + + +static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + doing_pdma = 1; + virtual_dma_port = io; + virtual_dma_mode = (mode == DMA_MODE_WRITE); + virtual_dma_addr = addr; + virtual_dma_count = size; + virtual_dma_residue = 0; + return 0; +} + +static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) +{ +#ifdef FLOPPY_SANITY_CHECK + if (CROSS_64KB(addr, size)) { + printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size); + return -1; + } +#endif + /* actual, physical DMA */ + doing_pdma = 0; + clear_dma_ff(FLOPPY_DMA); + set_dma_mode(FLOPPY_DMA, mode); + set_dma_addr(FLOPPY_DMA, isa_virt_to_bus(addr)); + set_dma_count(FLOPPY_DMA, size); + enable_dma(FLOPPY_DMA); + return 0; +} + +static struct fd_routine_l { + int (*_request_dma)(unsigned int dmanr, const char *device_id); + void (*_free_dma)(unsigned int dmanr); + int (*_get_dma_residue)(unsigned int dummy); + unsigned long (*_dma_mem_alloc)(unsigned long size); + int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); +} fd_routine[] = { + { + ._request_dma = request_dma, + ._free_dma = free_dma, + ._get_dma_residue = get_dma_residue, + ._dma_mem_alloc = dma_mem_alloc, + ._dma_setup = hard_dma_setup + }, + { + ._request_dma = vdma_request_dma, + ._free_dma = vdma_nop, + ._get_dma_residue = vdma_get_dma_residue, + ._dma_mem_alloc = vdma_mem_alloc, + ._dma_setup = vdma_dma_setup + } +}; + + +static int FDC1 = 0x3f0; +static int FDC2 = -1; + +/* + * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock + * is needed to prevent corrupted CMOS RAM in case "insmod floppy" + * coincides with another rtc CMOS user. Paul G. + */ +#define FLOPPY0_TYPE \ +({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = (CMOS_READ(0x10) >> 4) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define FLOPPY1_TYPE \ +({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = CMOS_READ(0x10) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define N_FDC 2 +#define N_DRIVE 8 + +#define EXTRA_FLOPPY_PARAMS + +#endif /* _ASM_X86_FLOPPY_H */ diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h new file mode 100644 index 000000000..e829fa4c6 --- /dev/null +++ b/arch/x86/include/asm/fpu/api.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_FPU_API_H +#define _ASM_X86_FPU_API_H +#include + +#include + +/* + * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It + * disables preemption so be careful if you intend to use it for long periods + * of time. + * If you intend to use the FPU in irq/softirq you need to check first with + * irq_fpu_usable() if it is possible. + */ + +/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */ +#define KFPU_387 _BITUL(0) /* 387 state will be initialized */ +#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */ + +extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); +extern void kernel_fpu_end(void); +extern bool irq_fpu_usable(void); +extern void fpregs_mark_activate(void); + +/* Code that is unaware of kernel_fpu_begin_mask() can use this */ +static inline void kernel_fpu_begin(void) +{ +#ifdef CONFIG_X86_64 + /* + * Any 64-bit code that uses 387 instructions must explicitly request + * KFPU_387. + */ + kernel_fpu_begin_mask(KFPU_MXCSR); +#else + /* + * 32-bit kernel code may use 387 operations as well as SSE2, etc, + * as long as it checks that the CPU has the required capability. + */ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +#endif +} + +/* + * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate. + * A context switch will (and softirq might) save CPU's FPU registers to + * fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in + * a random state. + * + * local_bh_disable() protects against both preemption and soft interrupts + * on !RT kernels. + * + * On RT kernels local_bh_disable() is not sufficient because it only + * serializes soft interrupt related sections via a local lock, but stays + * preemptible. Disabling preemption is the right choice here as bottom + * half processing is always in thread context on RT kernels so it + * implicitly prevents bottom half processing as well. + * + * Disabling preemption also serializes against kernel_fpu_begin(). + */ +static inline void fpregs_lock(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); +} + +static inline void fpregs_unlock(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +#ifdef CONFIG_X86_DEBUG_FPU +extern void fpregs_assert_state_consistent(void); +#else +static inline void fpregs_assert_state_consistent(void) { } +#endif + +/* + * Load the task FPU state before returning to userspace. + */ +extern void switch_fpu_return(void); + +/* + * Query the presence of one or more xfeatures. Works on any legacy CPU as well. + * + * If 'feature_name' is set then put a human-readable description of + * the feature there as well - this can be used to print error (or success) + * messages. + */ +extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); + +/* Trap handling */ +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); +extern void fpu_reset_from_exception_fixup(void); + +/* Boot, hotplug and resume */ +extern void fpu__init_cpu(void); +extern void fpu__init_system(void); +extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); + +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct swregs_state *soft); +#else +static inline void fpstate_init_soft(struct swregs_state *soft) {} +#endif + +/* State tracking */ +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +/* Process cleanup */ +#ifdef CONFIG_X86_64 +extern void fpstate_free(struct fpu *fpu); +#else +static inline void fpstate_free(struct fpu *fpu) { } +#endif + +/* fpstate-related functions which are exported to KVM */ +extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); + +extern u64 xstate_get_guest_group_perm(void); + +/* KVM specific functions */ +extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); +extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); +extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); +extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); + +#ifdef CONFIG_X86_64 +extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd); +extern void fpu_sync_guest_vmexit_xfd_state(void); +#else +static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { } +static inline void fpu_sync_guest_vmexit_xfd_state(void) { } +#endif + +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u64 xfeatures, u32 pkru); +extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); + +static inline void fpstate_set_confidential(struct fpu_guest *gfpu) +{ + gfpu->fpstate->is_confidential = true; +} + +static inline bool fpstate_is_confidential(struct fpu_guest *gfpu) +{ + return gfpu->fpstate->is_confidential; +} + +/* prctl */ +extern long fpu_xstate_prctl(int option, unsigned long arg2); + +extern void fpu_idle_fpregs(void); + +#endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h new file mode 100644 index 000000000..4f928d6a3 --- /dev/null +++ b/arch/x86/include/asm/fpu/regset.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * FPU regset handling methods: + */ +#ifndef _ASM_X86_FPU_REGSET_H +#define _ASM_X86_FPU_REGSET_H + +#include + +extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; +extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; + +/* + * xstateregs_active == regset_fpregs_active. Please refer to the comment + * at the definition of regset_fpregs_active. + */ +#define xstateregs_active regset_fpregs_active + +#endif /* _ASM_X86_FPU_REGSET_H */ diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h new file mode 100644 index 000000000..c2d6cd78e --- /dev/null +++ b/arch/x86/include/asm/fpu/sched.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FPU_SCHED_H +#define _ASM_X86_FPU_SCHED_H + +#include + +#include +#include + +#include + +extern void save_fpregs_to_fpstate(struct fpu *fpu); +extern void fpu__drop(struct fpu *fpu); +extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal); +extern void fpu_flush_thread(void); + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state. + * This is done within the context of the old process. + * + * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state + * will get loaded on return to userspace, or when the kernel needs it. + * + * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers + * are saved in the current thread's FPU register state. + * + * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not + * hold current()'s FPU registers. It is required to load the + * registers before returning to userland or using the content + * otherwise. + * + * The FPU context is only stored/restored for a user task and + * PF_KTHREAD is used to distinguish between kernel and user threads. + */ +static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +{ + if (cpu_feature_enabled(X86_FEATURE_FPU) && + !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) { + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. + */ + old_fpu->last_cpu = cpu; + + trace_x86_fpu_regs_deactivated(old_fpu); + } +} + +/* + * Delay loading of the complete FPU state until the return to userland. + * PKRU is handled separately. + */ +static inline void switch_fpu_finish(void) +{ + if (cpu_feature_enabled(X86_FEATURE_FPU)) + set_thread_flag(TIF_NEED_FPU_LOAD); +} + +#endif /* _ASM_X86_FPU_SCHED_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h new file mode 100644 index 000000000..e1c9df910 --- /dev/null +++ b/arch/x86/include/asm/fpu/signal.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 FPU signal frame handling methods: + */ +#ifndef _ASM_X86_FPU_SIGNAL_H +#define _ASM_X86_FPU_SIGNAL_H + +#include +#include + +#include + +#ifdef CONFIG_X86_64 +# include +# include +struct ksignal; +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct fxregs_state *fxsave, + const struct user_i387_ia32_struct *env); + +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size); + +unsigned long fpu__get_fpstate_size(void); + +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); +extern void fpu__clear_user_states(struct fpu *fpu); +extern bool fpu__restore_sig(void __user *buf, int ia32_frame); + +extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask); +#endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h new file mode 100644 index 000000000..eb7cd1139 --- /dev/null +++ b/arch/x86/include/asm/fpu/types.h @@ -0,0 +1,585 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * FPU data structures: + */ +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H + +/* + * The legacy x87 FPU state format, as saved by FSAVE and + * restored by the FRSTOR instructions: + */ +struct fregs_state { + u32 cwd; /* FPU Control Word */ + u32 swd; /* FPU Status Word */ + u32 twd; /* FPU Tag Word */ + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Pointer Offset */ + u32 fos; /* FPU Operand Pointer Selector */ + + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + + /* Software status information [not touched by FSAVE]: */ + u32 status; +}; + +/* + * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and + * restored by the FXRSTOR instructions. It's similar to the FSAVE + * format, but differs in some areas, plus has extensions at + * the end for the XMM registers. + */ +struct fxregs_state { + u16 cwd; /* Control Word */ + u16 swd; /* Status Word */ + u16 twd; /* Tag Word */ + u16 fop; /* Last Instruction Opcode */ + union { + struct { + u64 rip; /* Instruction Pointer */ + u64 rdp; /* Data Pointer */ + }; + struct { + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Offset */ + u32 fos; /* FPU Operand Selector */ + }; + }; + u32 mxcsr; /* MXCSR Register State */ + u32 mxcsr_mask; /* MXCSR Mask */ + + /* 8*16 bytes for each FP-reg = 128 bytes: */ + u32 st_space[32]; + + /* 16*16 bytes for each XMM-reg = 256 bytes: */ + u32 xmm_space[64]; + + u32 padding[12]; + + union { + u32 padding1[12]; + u32 sw_reserved[12]; + }; + +} __attribute__((aligned(16))); + +/* Default value for fxregs_state.mxcsr: */ +#define MXCSR_DEFAULT 0x1f80 + +/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ +#define MXCSR_AND_FLAGS_SIZE sizeof(u64) + +/* + * Software based FPU emulation state. This is arbitrary really, + * it matches the x87 format to make it easier to understand: + */ +struct swregs_state { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + u8 ftop; + u8 changed; + u8 lookahead; + u8 no_update; + u8 rm; + u8 alimit; + struct math_emu_info *info; + u32 entry_eip; +}; + +/* + * List of XSAVE features Linux knows about: + */ +enum xfeature { + XFEATURE_FP, + XFEATURE_SSE, + /* + * Values above here are "legacy states". + * Those below are "extended states". + */ + XFEATURE_YMM, + XFEATURE_BNDREGS, + XFEATURE_BNDCSR, + XFEATURE_OPMASK, + XFEATURE_ZMM_Hi256, + XFEATURE_Hi16_ZMM, + XFEATURE_PT_UNIMPLEMENTED_SO_FAR, + XFEATURE_PKRU, + XFEATURE_PASID, + XFEATURE_RSRVD_COMP_11, + XFEATURE_RSRVD_COMP_12, + XFEATURE_RSRVD_COMP_13, + XFEATURE_RSRVD_COMP_14, + XFEATURE_LBR, + XFEATURE_RSRVD_COMP_16, + XFEATURE_XTILE_CFG, + XFEATURE_XTILE_DATA, + + XFEATURE_MAX, +}; + +#define XFEATURE_MASK_FP (1 << XFEATURE_FP) +#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) +#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) +#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) +#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) +#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) +#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) +#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) +#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR) +#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) +#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) +#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) +#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) +#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) + +#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) +#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ + | XFEATURE_MASK_ZMM_Hi256 \ + | XFEATURE_MASK_Hi16_ZMM) + +#ifdef CONFIG_X86_64 +# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \ + | XFEATURE_MASK_XTILE_CFG) +#else +# define XFEATURE_MASK_XTILE (0) +#endif + +#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM + +struct reg_128_bit { + u8 regbytes[128/8]; +}; +struct reg_256_bit { + u8 regbytes[256/8]; +}; +struct reg_512_bit { + u8 regbytes[512/8]; +}; +struct reg_1024_byte { + u8 regbytes[1024]; +}; + +/* + * State component 2: + * + * There are 16x 256-bit AVX registers named YMM0-YMM15. + * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) + * and are stored in 'struct fxregs_state::xmm_space[]' in the + * "legacy" area. + * + * The high 128 bits are stored here. + */ +struct ymmh_struct { + struct reg_128_bit hi_ymm[16]; +} __packed; + +/* Intel MPX support: */ + +struct mpx_bndreg { + u64 lower_bound; + u64 upper_bound; +} __packed; +/* + * State component 3 is used for the 4 128-bit bounds registers + */ +struct mpx_bndreg_state { + struct mpx_bndreg bndreg[4]; +} __packed; + +/* + * State component 4 is used for the 64-bit user-mode MPX + * configuration register BNDCFGU and the 64-bit MPX status + * register BNDSTATUS. We call the pair "BNDCSR". + */ +struct mpx_bndcsr { + u64 bndcfgu; + u64 bndstatus; +} __packed; + +/* + * The BNDCSR state is padded out to be 64-bytes in size. + */ +struct mpx_bndcsr_state { + union { + struct mpx_bndcsr bndcsr; + u8 pad_to_64_bytes[64]; + }; +} __packed; + +/* AVX-512 Components: */ + +/* + * State component 5 is used for the 8 64-bit opmask registers + * k0-k7 (opmask state). + */ +struct avx_512_opmask_state { + u64 opmask_reg[8]; +} __packed; + +/* + * State component 6 is used for the upper 256 bits of the + * registers ZMM0-ZMM15. These 16 256-bit values are denoted + * ZMM0_H-ZMM15_H (ZMM_Hi256 state). + */ +struct avx_512_zmm_uppers_state { + struct reg_256_bit zmm_upper[16]; +} __packed; + +/* + * State component 7 is used for the 16 512-bit registers + * ZMM16-ZMM31 (Hi16_ZMM state). + */ +struct avx_512_hi16_state { + struct reg_512_bit hi16_zmm[16]; +} __packed; + +/* + * State component 9: 32-bit PKRU register. The state is + * 8 bytes long but only 4 bytes is used currently. + */ +struct pkru_state { + u32 pkru; + u32 pad; +} __packed; + +/* + * State component 15: Architectural LBR configuration state. + * The size of Arch LBR state depends on the number of LBRs (lbr_depth). + */ + +struct lbr_entry { + u64 from; + u64 to; + u64 info; +}; + +struct arch_lbr_state { + u64 lbr_ctl; + u64 lbr_depth; + u64 ler_from; + u64 ler_to; + u64 ler_info; + struct lbr_entry entries[]; +}; + +/* + * State component 17: 64-byte tile configuration register. + */ +struct xtile_cfg { + u64 tcfg[8]; +} __packed; + +/* + * State component 18: 1KB tile data register. + * Each register represents 16 64-byte rows of the matrix + * data. But the number of registers depends on the actual + * implementation. + */ +struct xtile_data { + struct reg_1024_byte tmm; +} __packed; + +/* + * State component 10 is supervisor state used for context-switching the + * PASID state. + */ +struct ia32_pasid_state { + u64 pasid; +} __packed; + +struct xstate_header { + u64 xfeatures; + u64 xcomp_bv; + u64 reserved[6]; +} __attribute__((packed)); + +/* + * xstate_header.xcomp_bv[63] indicates that the extended_state_area + * is in compacted format. + */ +#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) + +/* + * This is our most modern FPU state format, as saved by the XSAVE + * and restored by the XRSTOR instructions. + * + * It consists of a legacy fxregs portion, an xstate header and + * subsequent areas as defined by the xstate header. Not all CPUs + * support all the extensions, so the size of the extended area + * can vary quite a bit between CPUs. + */ +struct xregs_state { + struct fxregs_state i387; + struct xstate_header header; + u8 extended_state_area[0]; +} __attribute__ ((packed, aligned (64))); + +/* + * This is a union of all the possible FPU state formats + * put together, so that we can pick the right one runtime. + * + * The size of the structure is determined by the largest + * member - which is the xsave area. The padding is there + * to ensure that statically-allocated task_structs (just + * the init_task today) have enough space. + */ +union fpregs_state { + struct fregs_state fsave; + struct fxregs_state fxsave; + struct swregs_state soft; + struct xregs_state xsave; + u8 __padding[PAGE_SIZE]; +}; + +struct fpstate { + /* @kernel_size: The size of the kernel register image */ + unsigned int size; + + /* @user_size: The size in non-compacted UABI format */ + unsigned int user_size; + + /* @xfeatures: xfeatures for which the storage is sized */ + u64 xfeatures; + + /* @user_xfeatures: xfeatures valid in UABI buffers */ + u64 user_xfeatures; + + /* @xfd: xfeatures disabled to trap userspace use. */ + u64 xfd; + + /* @is_valloc: Indicator for dynamically allocated state */ + unsigned int is_valloc : 1; + + /* @is_guest: Indicator for guest state (KVM) */ + unsigned int is_guest : 1; + + /* + * @is_confidential: Indicator for KVM confidential mode. + * The FPU registers are restored by the + * vmentry firmware from encrypted guest + * memory. On vmexit the FPU registers are + * saved by firmware to encrypted guest memory + * and the registers are scrubbed before + * returning to the host. So there is no + * content which is worth saving and restoring. + * The fpstate has to be there so that + * preemption and softirq FPU usage works + * without special casing. + */ + unsigned int is_confidential : 1; + + /* @in_use: State is in use */ + unsigned int in_use : 1; + + /* @regs: The register state union for all supported formats */ + union fpregs_state regs; + + /* @regs is dynamically sized! Don't add anything after @regs! */ +} __aligned(64); + +#define FPU_GUEST_PERM_LOCKED BIT_ULL(63) + +struct fpu_state_perm { + /* + * @__state_perm: + * + * This bitmap indicates the permission for state components, which + * are available to a thread group. The permission prctl() sets the + * enabled state bits in thread_group_leader()->thread.fpu. + * + * All run time operations use the per thread information in the + * currently active fpu.fpstate which contains the xfeature masks + * and sizes for kernel and user space. + * + * This master permission field is only to be used when + * task.fpu.fpstate based checks fail to validate whether the task + * is allowed to expand it's xfeatures set which requires to + * allocate a larger sized fpstate buffer. + * + * Do not access this field directly. Use the provided helper + * function. Unlocked access is possible for quick checks. + */ + u64 __state_perm; + + /* + * @__state_size: + * + * The size required for @__state_perm. Only valid to access + * with sighand locked. + */ + unsigned int __state_size; + + /* + * @__user_state_size: + * + * The size required for @__state_perm user part. Only valid to + * access with sighand locked. + */ + unsigned int __user_state_size; +}; + +/* + * Highest level per task FPU state data structure that + * contains the FPU register state plus various FPU + * state fields: + */ +struct fpu { + /* + * @last_cpu: + * + * Records the last CPU on which this context was loaded into + * FPU registers. (In the lazy-restore case we might be + * able to reuse FPU registers across multiple context switches + * this way, if no intermediate task used the FPU.) + * + * A value of -1 is used to indicate that the FPU state in context + * memory is newer than the FPU state in registers, and that the + * FPU state should be reloaded next time the task is run. + */ + unsigned int last_cpu; + + /* + * @avx512_timestamp: + * + * Records the timestamp of AVX512 use during last context switch. + */ + unsigned long avx512_timestamp; + + /* + * @fpstate: + * + * Pointer to the active struct fpstate. Initialized to + * point at @__fpstate below. + */ + struct fpstate *fpstate; + + /* + * @__task_fpstate: + * + * Pointer to an inactive struct fpstate. Initialized to NULL. Is + * used only for KVM support to swap out the regular task fpstate. + */ + struct fpstate *__task_fpstate; + + /* + * @perm: + * + * Permission related information + */ + struct fpu_state_perm perm; + + /* + * @guest_perm: + * + * Permission related information for guest pseudo FPUs + */ + struct fpu_state_perm guest_perm; + + /* + * @__fpstate: + * + * Initial in-memory storage for FPU registers which are saved in + * context switch and when the kernel uses the FPU. The registers + * are restored from this storage on return to user space if they + * are not longer containing the tasks FPU register state. + */ + struct fpstate __fpstate; + /* + * WARNING: '__fpstate' is dynamically-sized. Do not put + * anything after it here. + */ +}; + +/* + * Guest pseudo FPU container + */ +struct fpu_guest { + /* + * @xfeatures: xfeature bitmap of features which are + * currently enabled for the guest vCPU. + */ + u64 xfeatures; + + /* + * @perm: xfeature bitmap of features which are + * permitted to be enabled for the guest + * vCPU. + */ + u64 perm; + + /* + * @xfd_err: Save the guest value. + */ + u64 xfd_err; + + /* + * @uabi_size: Size required for save/restore + */ + unsigned int uabi_size; + + /* + * @fpstate: Pointer to the allocated guest fpstate + */ + struct fpstate *fpstate; +}; + +/* + * FPU state configuration data. Initialized at boot time. Read only after init. + */ +struct fpu_state_config { + /* + * @max_size: + * + * The maximum size of the register state buffer. Includes all + * supported features except independent managed features. + */ + unsigned int max_size; + + /* + * @default_size: + * + * The default size of the register state buffer. Includes all + * supported features except independent managed features and + * features which have to be requested by user space before usage. + */ + unsigned int default_size; + + /* + * @max_features: + * + * The maximum supported features bitmap. Does not include + * independent managed features. + */ + u64 max_features; + + /* + * @default_features: + * + * The default supported features bitmap. Does not include + * independent managed features and features which have to + * be requested by user space before usage. + */ + u64 default_features; + /* + * @legacy_features: + * + * Features which can be reported back to user space + * even without XSAVE support, i.e. legacy features FP + SSE + */ + u64 legacy_features; +}; + +/* FPU state configuration information */ +extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; + +#endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h new file mode 100644 index 000000000..9a710c060 --- /dev/null +++ b/arch/x86/include/asm/fpu/xcr.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FPU_XCR_H +#define _ASM_X86_FPU_XCR_H + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 +#define XCR_XFEATURE_IN_USE_MASK 0x00000001 + +static __always_inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); +} + +/* + * Return a mask of xfeatures which are currently being tracked + * by the processor as being in the initial configuration. + * + * Callers should check X86_FEATURE_XGETBV1. + */ +static __always_inline u64 xfeatures_in_use(void) +{ + return xgetbv(XCR_XFEATURE_IN_USE_MASK); +} + +#endif /* _ASM_X86_FPU_XCR_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h new file mode 100644 index 000000000..cd3dd170e --- /dev/null +++ b/arch/x86/include/asm/fpu/xstate.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include +#include + +#include +#include +#include + +/* Bit 63 of XCR0 is reserved for future expansion */ +#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) + +#define XSTATE_CPUID 0x0000000d + +#define TILE_CPUID 0x0000001d + +#define FXSAVE_SIZE 512 + +#define XSAVE_HDR_SIZE 64 +#define XSAVE_HDR_OFFSET FXSAVE_SIZE + +#define XSAVE_YMM_SIZE 256 +#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) + +#define XSAVE_ALIGNMENT 64 + +/* All currently supported user features */ +#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_XTILE) + +/* + * Features which are restored when returning to user space. + * PKRU is not restored on return to user space because PKRU + * is switched eagerly in switch_to() and flush_thread() + */ +#define XFEATURE_MASK_USER_RESTORE \ + (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) + +/* Features which are dynamically enabled for a process on request */ +#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA + +/* All currently supported supervisor features */ +#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) + +/* + * A supervisor state component may not always contain valuable information, + * and its size may be huge. Saving/restoring such supervisor state components + * at each context switch can cause high CPU and space overhead, which should + * be avoided. Such supervisor state components should only be saved/restored + * on demand. The on-demand supervisor features are set in this mask. + * + * Unlike the existing supported supervisor features, an independent supervisor + * feature does not allocate a buffer in task->fpu, and the corresponding + * supervisor state component cannot be saved/restored at each context switch. + * + * To support an independent supervisor feature, a developer should follow the + * dos and don'ts as below: + * - Do dynamically allocate a buffer for the supervisor state component. + * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the + * state component to/from the buffer. + * - Don't set the bit corresponding to the independent supervisor feature in + * IA32_XSS at run time, since it has been set at boot time. + */ +#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) + +/* + * Unsupported supervisor features. When a supervisor feature in this mask is + * supported in the future, move it to the supported supervisor feature mask. + */ +#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT) + +/* All supervisor states including supported and unsupported states. */ +#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ + XFEATURE_MASK_INDEPENDENT | \ + XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) + +/* + * The feature mask required to restore FPU state: + * - All user states which are not eagerly switched in switch_to()/exec() + * - The suporvisor states + */ +#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ + XFEATURE_MASK_SUPERVISOR_SUPPORTED) + +/* + * Features in this mask have space allocated in the signal frame, but may not + * have that space initialized when the feature is in its init state. + */ +#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \ + XFEATURE_MASK_USER_DYNAMIC) + +extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; + +extern void __init update_regset_xstate_info(unsigned int size, + u64 xstate_mask); + +int xfeature_size(int xfeature_nr); + +void xsaves(struct xregs_state *xsave, u64 mask); +void xrstors(struct xregs_state *xsave, u64 mask); + +int xfd_enable_feature(u64 xfd_err); + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +#endif + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); + +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return static_branch_unlikely(&__fpu_state_size_dynamic); +} +#else +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return false; +} +#endif + +#endif diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h new file mode 100644 index 000000000..fb42659f6 --- /dev/null +++ b/arch/x86/include/asm/frame.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FRAME_H +#define _ASM_X86_FRAME_H + +#include + +/* + * These are stack frame creation macros. They should be used by every + * callable non-leaf asm function to make kernel stack traces more reliable. + */ + +#ifdef CONFIG_FRAME_POINTER + +#ifdef __ASSEMBLY__ + +.macro FRAME_BEGIN + push %_ASM_BP + _ASM_MOV %_ASM_SP, %_ASM_BP +.endm + +.macro FRAME_END + pop %_ASM_BP +.endm + +#ifdef CONFIG_X86_64 +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just setting the LSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts + * the original rbp. + */ +.macro ENCODE_FRAME_POINTER ptregs_offset=0 + leaq 1+\ptregs_offset(%rsp), %rbp +.endm +#else /* !CONFIG_X86_64 */ +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just clearing the MSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the + * original ebp. + */ +.macro ENCODE_FRAME_POINTER + mov %esp, %ebp + andl $0x7fffffff, %ebp +.endm +#endif /* CONFIG_X86_64 */ + +#else /* !__ASSEMBLY__ */ + +#define FRAME_BEGIN \ + "push %" _ASM_BP "\n" \ + _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n" + +#define FRAME_END "pop %" _ASM_BP "\n" + +#ifdef CONFIG_X86_64 + +#define ENCODE_FRAME_POINTER \ + "lea 1(%rsp), %rbp\n\t" + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return (unsigned long)regs + 1; +} + +#else /* !CONFIG_X86_64 */ + +#define ENCODE_FRAME_POINTER \ + "movl %esp, %ebp\n\t" \ + "andl $0x7fffffff, %ebp\n\t" + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return (unsigned long)regs & 0x7fffffff; +} + +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLY__ */ + +#define FRAME_OFFSET __ASM_SEL(4, 8) + +#else /* !CONFIG_FRAME_POINTER */ + +#ifdef __ASSEMBLY__ + +.macro ENCODE_FRAME_POINTER ptregs_offset=0 +.endm + +#else /* !__ASSEMBLY */ + +#define ENCODE_FRAME_POINTER + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return 0; +} + +#endif + +#define FRAME_BEGIN +#define FRAME_END +#define FRAME_OFFSET 0 + +#endif /* CONFIG_FRAME_POINTER */ + +#endif /* _ASM_X86_FRAME_H */ diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h new file mode 100644 index 000000000..35cff5f2b --- /dev/null +++ b/arch/x86/include/asm/fsgsbase.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_FSGSBASE_H +#define _ASM_FSGSBASE_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 + +#include + +/* + * Read/write a task's FSBASE or GSBASE. This returns the value that + * the FS/GS base would have (if the task were to be resumed). These + * work on the current task or on a non-running (typically stopped + * ptrace child) task. + */ +extern unsigned long x86_fsbase_read_task(struct task_struct *task); +extern unsigned long x86_gsbase_read_task(struct task_struct *task); +extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); +extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); + +/* Must be protected by X86_FEATURE_FSGSBASE check. */ + +static __always_inline unsigned long rdfsbase(void) +{ + unsigned long fsbase; + + asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory"); + + return fsbase; +} + +static __always_inline unsigned long rdgsbase(void) +{ + unsigned long gsbase; + + asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory"); + + return gsbase; +} + +static __always_inline void wrfsbase(unsigned long fsbase) +{ + asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); +} + +static __always_inline void wrgsbase(unsigned long gsbase) +{ + asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); +} + +#include + +/* Helper functions for reading/writing FS/GS base */ + +static inline unsigned long x86_fsbase_read_cpu(void) +{ + unsigned long fsbase; + + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) + fsbase = rdfsbase(); + else + rdmsrl(MSR_FS_BASE, fsbase); + + return fsbase; +} + +static inline void x86_fsbase_write_cpu(unsigned long fsbase) +{ + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) + wrfsbase(fsbase); + else + wrmsrl(MSR_FS_BASE, fsbase); +} + +extern unsigned long x86_gsbase_read_cpu_inactive(void); +extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); +extern unsigned long x86_fsgsbase_read_task(struct task_struct *task, + unsigned short selector); + +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h new file mode 100644 index 000000000..908d99b12 --- /dev/null +++ b/arch/x86/include/asm/ftrace.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FTRACE_H +#define _ASM_X86_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER +#ifndef CC_USING_FENTRY +# error Compiler does not support fentry? +#endif +# define MCOUNT_ADDR ((unsigned long)(__fentry__)) +#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ + +/* Ignore unused weak functions which will have non zero offsets */ +#ifdef CONFIG_HAVE_FENTRY +# include +/* Add offset for endbr64 if IBT enabled */ +# define FTRACE_MCOUNT_MAX_OFFSET ENDBR_INSN_SIZE +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif + +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + +#ifndef __ASSEMBLY__ +extern void __fentry__(void); + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. + */ + return addr; +} + +/* + * When a ftrace registered caller is tracing a function that is + * also set by a register_ftrace_direct() call, it needs to be + * differentiated in the ftrace_caller trampoline. To do this, we + * place the direct caller in the ORIG_AX part of pt_regs. This + * tells the ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + /* Emulate a call */ + regs->orig_ax = addr; +} + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs * +arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + /* Only when FL_SAVE_REGS is set, cs will be non zero */ + if (!fregs->regs.cs) + return NULL; + return &fregs->regs; +} + +#define ftrace_instruction_pointer_set(fregs, _ip) \ + do { (fregs)->regs.ip = (_ip); } while (0) + +struct ftrace_ops; +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#else +#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_TRACER */ + + +#ifndef __ASSEMBLY__ + +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +extern void set_ftrace_ops_ro(void); +#else +static inline void set_ftrace_ops_ro(void) { } +#endif + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Compare the symbol name with the system call name. Skip the + * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix. + */ + return !strcmp(sym + 3, name + 3) || + (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) || + (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) || + (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3)); +} + +#ifndef COMPILE_OFFSETS + +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION) +#include + +/* + * Because ia32 syscalls do not map to x86_64 syscall numbers + * this screws up the trace output when tracing a ia32 task. + * Instead of reporting bogus syscalls, just do not trace them. + * + * If the user really wants these, then they should use the + * raw syscall tracepoints with filtering. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1 +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return in_32bit_syscall(); +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ +#endif /* !COMPILE_OFFSETS */ +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h new file mode 100644 index 000000000..99d345b68 --- /dev/null +++ b/arch/x86/include/asm/futex.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FUTEX_H +#define _ASM_X86_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include + +#include +#include +#include +#include + +#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \ +do { \ + int oldval = 0, ret; \ + asm volatile("1:\t" insn "\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \ + : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ + : "0" (oparg), "1" (0)); \ + if (ret) \ + goto label; \ + *oval = oldval; \ +} while(0) + + +#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \ +do { \ + int oldval = 0, ret, tem; \ + asm volatile("1:\tmovl %2, %0\n" \ + "2:\tmovl\t%0, %3\n" \ + "\t" insn "\n" \ + "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + "\tjnz\t2b\n" \ + "4:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \ + _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \ + : "=&a" (oldval), "=&r" (ret), \ + "+m" (*uaddr), "=&r" (tem) \ + : "r" (oparg), "1" (0)); \ + if (ret) \ + goto label; \ + *oval = oldval; \ +} while(0) + +static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, + u32 __user *uaddr) +{ + if (!user_access_begin(uaddr, sizeof(u32))) + return -EFAULT; + + switch (op) { + case FUTEX_OP_SET: + unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ADD: + unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, + uaddr, oparg, Efault); + break; + case FUTEX_OP_OR: + unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ANDN: + unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); + break; + case FUTEX_OP_XOR: + unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); + break; + default: + user_access_end(); + return -ENOSYS; + } + user_access_end(); + return 0; +Efault: + user_access_end(); + return -EFAULT; +} + +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + + if (!user_access_begin(uaddr, sizeof(u32))) + return -EFAULT; + asm volatile("\n" + "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" + "2:\n" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \ + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "r" (newval), "1" (oldval) + : "memory" + ); + user_access_end(); + *uval = oldval; + return ret; +} + +#endif +#endif /* _ASM_X86_FUTEX_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h new file mode 100644 index 000000000..5af8088a1 --- /dev/null +++ b/arch/x86/include/asm/gart.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_GART_H +#define _ASM_X86_GART_H + +#include + +extern void set_up_gart_resume(u32, u32); + +extern int fallback_aper_order; +extern int fallback_aper_force; +extern int fix_aperture; + +/* PTE bits. */ +#define GPTE_VALID 1 +#define GPTE_COHERENT 2 + +/* Aperture control register bits. */ +#define GARTEN (1<<0) +#define DISGARTCPU (1<<4) +#define DISGARTIO (1<<5) +#define DISTLBWALKPRB (1<<6) + +/* GART cache control register bits. */ +#define INVGART (1<<0) +#define GARTPTEERR (1<<1) + +/* K8 On-cpu GART registers */ +#define AMD64_GARTAPERTURECTL 0x90 +#define AMD64_GARTAPERTUREBASE 0x94 +#define AMD64_GARTTABLEBASE 0x98 +#define AMD64_GARTCACHECTL 0x9c + +#ifdef CONFIG_GART_IOMMU +extern int gart_iommu_aperture; +extern int gart_iommu_aperture_allowed; +extern int gart_iommu_aperture_disabled; + +extern void early_gart_iommu_check(void); +extern int gart_iommu_init(void); +extern void __init gart_parse_options(char *); +void gart_iommu_hole_init(void); + +#else +#define gart_iommu_aperture 0 +#define gart_iommu_aperture_allowed 0 +#define gart_iommu_aperture_disabled 1 + +static inline void early_gart_iommu_check(void) +{ +} +static inline void gart_parse_options(char *options) +{ +} +static inline void gart_iommu_hole_init(void) +{ +} +#endif + +extern int agp_amd64_init(void); + +static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order) +{ + u32 ctl; + + /* + * Don't enable translation but enable GART IO and CPU accesses. + * Also, set DISTLBWALKPRB since GART tables memory is UC. + */ + ctl = order << 1; + + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + +static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) +{ + u32 tmp, ctl; + + /* address of the mappings table */ + addr >>= 12; + tmp = (u32) addr<<4; + tmp &= ~0xf; + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); + + /* Enable GART translation for this hammer. */ + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); + ctl |= GARTEN | DISTLBWALKPRB; + ctl &= ~(DISGARTCPU | DISGARTIO); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + +static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) +{ + if (!aper_base) + return 0; + + if (aper_base + aper_size > 0x100000000ULL) { + printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n"); + return 0; + } + if (e820__mapped_any(aper_base, aper_base + aper_size, E820_TYPE_RAM)) { + printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + if (aper_size < min_size) { + printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n", + aper_size>>20, min_size>>20); + return 0; + } + + return 1; +} + +#endif /* _ASM_X86_GART_H */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h new file mode 100644 index 000000000..4b8b98fa7 --- /dev/null +++ b/arch/x86/include/asm/genapic.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h new file mode 100644 index 000000000..3c7267ef4 --- /dev/null +++ b/arch/x86/include/asm/geode.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Geode definitions + * Copyright (C) 2006, Advanced Micro Devices, Inc. + */ + +#ifndef _ASM_X86_GEODE_H +#define _ASM_X86_GEODE_H + +#include +#include +#include + +static inline int is_geode_gx(void) +{ + return ((boot_cpu_data.x86_vendor == X86_VENDOR_NSC) && + (boot_cpu_data.x86 == 5) && + (boot_cpu_data.x86_model == 5)); +} + +static inline int is_geode_lx(void) +{ + return ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (boot_cpu_data.x86 == 5) && + (boot_cpu_data.x86_model == 10)); +} + +static inline int is_geode(void) +{ + return (is_geode_gx() || is_geode_lx()); +} + +#endif /* _ASM_X86_GEODE_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h new file mode 100644 index 000000000..275e7fd20 --- /dev/null +++ b/arch/x86/include/asm/hardirq.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HARDIRQ_H +#define _ASM_X86_HARDIRQ_H + +#include + +typedef struct { + u16 __softirq_pending; +#if IS_ENABLED(CONFIG_KVM_INTEL) + u8 kvm_cpu_l1tf_flush_l1d; +#endif + unsigned int __nmi_count; /* arch dependent */ +#ifdef CONFIG_X86_LOCAL_APIC + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq_spurious_count; + unsigned int icr_read_retry_count; +#endif +#ifdef CONFIG_HAVE_KVM + unsigned int kvm_posted_intr_ipis; + unsigned int kvm_posted_intr_wakeup_ipis; + unsigned int kvm_posted_intr_nested_ipis; +#endif + unsigned int x86_platform_ipis; /* arch dependent */ + unsigned int apic_perf_irqs; + unsigned int apic_irq_work_irqs; +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; +#endif + unsigned int irq_tlb_count; +#ifdef CONFIG_X86_THERMAL_VECTOR + unsigned int irq_thermal_count; +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD + unsigned int irq_threshold_count; +#endif +#ifdef CONFIG_X86_MCE_AMD + unsigned int irq_deferred_error_count; +#endif +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR + unsigned int irq_hv_callback_count; +#endif +#if IS_ENABLED(CONFIG_HYPERV) + unsigned int irq_hv_reenlightenment_count; + unsigned int hyperv_stimer0_count; +#endif +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) this_cpu_inc(irq_stat.member) + +extern void ack_bad_irq(unsigned int irq); + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat + + +#if IS_ENABLED(CONFIG_KVM_INTEL) +static inline void kvm_set_cpu_l1tf_flush_l1d(void) +{ + __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1); +} + +static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void) +{ + __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0); +} + +static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void) +{ + return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d); +} +#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */ +static inline void kvm_set_cpu_l1tf_flush_l1d(void) { } +#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */ + +#endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h new file mode 100644 index 000000000..731ee7cc4 --- /dev/null +++ b/arch/x86/include/asm/highmem.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar + */ + +#ifndef _ASM_X86_HIGHMEM_H +#define _ASM_X86_HIGHMEM_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +/* + * Ordering is: + * + * high memory on: high_memory off: + * FIXADDR_TOP FIXADDR_TOP + * fixed addresses fixed addresses + * FIXADDR_START FIXADDR_START + * temp fixed addresses/persistent kmap area VMALLOC_END + * PKMAP_BASE temp fixed addresses/vmalloc area + * VMALLOC_END VMALLOC_START + * vmalloc area high_memory + * VMALLOC_START + * high_memory + * + * The temp fixed area is only used during boot for early_ioremap(), and + * it is unused when the ioremap() is functional. vmalloc/pkmap area become + * available after early boot so the temp fixed area is available for re-use. + */ +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +#define flush_cache_kmaps() do { } while (0) + +#define arch_kmap_local_post_map(vaddr, pteval) \ + arch_flush_lazy_mmu_mode() + +#define arch_kmap_local_post_unmap(vaddr) \ + do { \ + flush_tlb_one_kernel((vaddr)); \ + arch_flush_lazy_mmu_mode(); \ + } while (0) + +extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h new file mode 100644 index 000000000..ab9f3dd87 --- /dev/null +++ b/arch/x86/include/asm/hpet.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HPET_H +#define _ASM_X86_HPET_H + +#include + +#ifdef CONFIG_HPET_TIMER + +#define HPET_MMAP_SIZE 1024 + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 +#define HPET_CFG 0x010 +#define HPET_STATUS 0x020 +#define HPET_COUNTER 0x0f0 + +#define HPET_Tn_CFG(n) (0x100 + 0x20 * n) +#define HPET_Tn_CMP(n) (0x108 + 0x20 * n) +#define HPET_Tn_ROUTE(n) (0x110 + 0x20 * n) + +#define HPET_T0_CFG 0x100 +#define HPET_T0_CMP 0x108 +#define HPET_T0_ROUTE 0x110 +#define HPET_T1_CFG 0x120 +#define HPET_T1_CMP 0x128 +#define HPET_T1_ROUTE 0x130 +#define HPET_T2_CFG 0x140 +#define HPET_T2_CMP 0x148 +#define HPET_T2_ROUTE 0x150 + +#define HPET_ID_REV 0x000000ff +#define HPET_ID_NUMBER 0x00001f00 +#define HPET_ID_64BIT 0x00002000 +#define HPET_ID_LEGSUP 0x00008000 +#define HPET_ID_VENDOR 0xffff0000 +#define HPET_ID_NUMBER_SHIFT 8 +#define HPET_ID_VENDOR_SHIFT 16 + +#define HPET_CFG_ENABLE 0x001 +#define HPET_CFG_LEGACY 0x002 +#define HPET_LEGACY_8254 2 +#define HPET_LEGACY_RTC 8 + +#define HPET_TN_LEVEL 0x0002 +#define HPET_TN_ENABLE 0x0004 +#define HPET_TN_PERIODIC 0x0008 +#define HPET_TN_PERIODIC_CAP 0x0010 +#define HPET_TN_64BIT_CAP 0x0020 +#define HPET_TN_SETVAL 0x0040 +#define HPET_TN_32BIT 0x0100 +#define HPET_TN_ROUTE 0x3e00 +#define HPET_TN_FSB 0x4000 +#define HPET_TN_FSB_CAP 0x8000 +#define HPET_TN_ROUTE_SHIFT 9 + +/* Max HPET Period is 10^8 femto sec as in HPET spec */ +#define HPET_MAX_PERIOD 100000000UL +/* + * Min HPET period is 10^5 femto sec just for safety. If it is less than this, + * then 32 bit HPET counter wrapsaround in less than 0.5 sec. + */ +#define HPET_MIN_PERIOD 100000UL + +/* hpet memory map physical address */ +extern unsigned long hpet_address; +extern unsigned long force_hpet_address; +extern bool boot_hpet_disable; +extern u8 hpet_blockid; +extern bool hpet_force_user; +extern bool hpet_msi_disable; +extern int is_hpet_enabled(void); +extern int hpet_enable(void); +extern void hpet_disable(void); +extern unsigned int hpet_readl(unsigned int a); +extern void force_hpet_resume(void); + +#ifdef CONFIG_HPET_EMULATE_RTC + +#include + +typedef irqreturn_t (*rtc_irq_handler)(int interrupt, void *cookie); +extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); +extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); +extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, + unsigned char sec); +extern int hpet_set_periodic_freq(unsigned long freq); +extern int hpet_rtc_dropped_irq(void); +extern int hpet_rtc_timer_init(void); +extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); +extern int hpet_register_irq_handler(rtc_irq_handler handler); +extern void hpet_unregister_irq_handler(rtc_irq_handler handler); + +#endif /* CONFIG_HPET_EMULATE_RTC */ + +#else /* CONFIG_HPET_TIMER */ + +static inline int hpet_enable(void) { return 0; } +static inline int is_hpet_enabled(void) { return 0; } +#define hpet_readl(a) 0 +#define default_setup_hpet_msi NULL + +#endif +#endif /* _ASM_X86_HPET_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h new file mode 100644 index 000000000..1721b1aad --- /dev/null +++ b/arch/x86/include/asm/hugetlb.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HUGETLB_H +#define _ASM_X86_HUGETLB_H + +#include +#include + +#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE) + +#endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000..0bc931cd0 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#include + +#define __ARCH_HW_BREAKPOINT_H + +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ +struct arch_hw_breakpoint { + unsigned long address; + unsigned long mask; + u8 len; + u8 type; +}; + +#include +#include +#include + +/* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X 0x40 +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c + +#ifdef CONFIG_X86_64 +#define X86_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define X86_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define X86_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define X86_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +#define hw_breakpoint_slots(type) (HBP_NUM) + +struct perf_event_attr; +struct perf_event; +struct pmu; + +extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +extern int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + +#endif /* _I386_HW_BREAKPOINT_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h new file mode 100644 index 000000000..d465ece58 --- /dev/null +++ b/arch/x86/include/asm/hw_irq.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HW_IRQ_H +#define _ASM_X86_HW_IRQ_H + +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * moved some of the old arch/i386/kernel/irq.h to here. VY + * + * IRQ/IPI changes taken from work by Thomas Radke + * + * + * hacked by Andi Kleen for x86-64. + * unified by tglx + */ + +#include + +#define IRQ_MATRIX_BITS NR_VECTORS + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC +struct irq_data; +struct pci_dev; +struct msi_desc; + +enum irq_alloc_type { + X86_IRQ_ALLOC_TYPE_IOAPIC = 1, + X86_IRQ_ALLOC_TYPE_HPET, + X86_IRQ_ALLOC_TYPE_PCI_MSI, + X86_IRQ_ALLOC_TYPE_PCI_MSIX, + X86_IRQ_ALLOC_TYPE_DMAR, + X86_IRQ_ALLOC_TYPE_AMDVI, + X86_IRQ_ALLOC_TYPE_UV, +}; + +struct ioapic_alloc_info { + int pin; + int node; + u32 is_level : 1; + u32 active_low : 1; + u32 valid : 1; +}; + +struct uv_alloc_info { + int limit; + int blade; + unsigned long offset; + char *name; + +}; + +/** + * irq_alloc_info - X86 specific interrupt allocation info + * @type: X86 specific allocation type + * @flags: Flags for allocation tweaks + * @devid: Device ID for allocations + * @hwirq: Associated hw interrupt number in the domain + * @mask: CPU mask for vector allocation + * @desc: Pointer to msi descriptor + * @data: Allocation specific data + * + * @ioapic: IOAPIC specific allocation data + * @uv: UV specific allocation data +*/ +struct irq_alloc_info { + enum irq_alloc_type type; + u32 flags; + u32 devid; + irq_hw_number_t hwirq; + const struct cpumask *mask; + struct msi_desc *desc; + void *data; + + union { + struct ioapic_alloc_info ioapic; + struct uv_alloc_info uv; + }; +}; + +struct irq_cfg { + unsigned int dest_apicid; + unsigned int vector; +}; + +extern struct irq_cfg *irq_cfg(unsigned int irq); +extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +#ifdef CONFIG_SMP +extern void send_cleanup_vector(struct irq_cfg *); +extern void irq_complete_move(struct irq_cfg *cfg); +#else +static inline void send_cleanup_vector(struct irq_cfg *c) { } +static inline void irq_complete_move(struct irq_cfg *c) { } +#endif + +extern void apic_ack_edge(struct irq_data *data); +#else /* CONFIG_X86_LOCAL_APIC */ +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +#endif /* CONFIG_X86_LOCAL_APIC */ + +/* Statistics */ +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +extern void elcr_set_level_irq(unsigned int irq); + +extern char irq_entries_start[]; +#ifdef CONFIG_TRACING +#define trace_irq_entries_start irq_entries_start +#endif + +extern char spurious_entries_start[]; + +#define VECTOR_UNUSED NULL +#define VECTOR_SHUTDOWN ((void *)-1L) +#define VECTOR_RETRIGGERED ((void *)-2L) + +typedef struct irq_desc* vector_irq_t[NR_VECTORS]; +DECLARE_PER_CPU(vector_irq_t, vector_irq); + +#endif /* !ASSEMBLY_ */ + +#endif /* _ASM_X86_HW_IRQ_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h new file mode 100644 index 000000000..c5e0e5a06 --- /dev/null +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -0,0 +1,676 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file contains definitions from Hyper-V Hypervisor Top-Level Functional + * Specification (TLFS): + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + */ + +#ifndef _ASM_X86_HYPERV_TLFS_H +#define _ASM_X86_HYPERV_TLFS_H + +#include +#include +/* + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent + * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). + */ +#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 +#define HYPERV_CPUID_INTERFACE 0x40000001 +#define HYPERV_CPUID_VERSION 0x40000002 +#define HYPERV_CPUID_FEATURES 0x40000003 +#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 +#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A +#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C + +#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081 +#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */ + +#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082 +/* Support for the extended IOAPIC RTE format */ +#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2) + +#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 +#define HYPERV_CPUID_MIN 0x40000005 +#define HYPERV_CPUID_MAX 0x4000ffff + +/* + * Group D Features. The bit assignments are custom to each architecture. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits. + */ +/* The MWAIT instruction is available (per section MONITOR / MWAIT) */ +#define HV_X64_MWAIT_AVAILABLE BIT(0) +/* Guest debugging support is available */ +#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) +/* Performance Monitor support is available*/ +#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) +/* Support for physical CPU dynamic partitioning events is available*/ +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) +/* + * Support for passing hypercall input parameter block via XMM + * registers is available + */ +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) +/* Support for a virtual guest idle state is available */ +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) +/* Frequency MSRs available */ +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) +/* Crash MSR available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +/* Support for debug MSRs available */ +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +/* + * Support for returning hypercall output block via XMM + * registers is available + */ +#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15) +/* stimer Direct Mode is available */ +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) + +/* + * Implementation recommendations. Indicates which behaviors the hypervisor + * recommends the OS implement for optimal performance. + * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits. + */ +/* + * Recommend using hypercall for address space switches rather + * than MOV to CR3 instruction + */ +#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) +/* Recommend using hypercall for local TLB flushes rather + * than INVLPG or MOV to CR3 instructions */ +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) +/* + * Recommend using hypercall for remote TLB flushes rather + * than inter-processor interrupts + */ +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) +/* + * Recommend using MSRs for accessing APIC registers + * EOI, ICR and TPR rather than their memory-mapped counterparts + */ +#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) +/* Recommend using the hypervisor-provided MSR to initiate a system RESET */ +#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) +/* + * Recommend using relaxed timing for this partition. If used, + * the VM should disable any watchdog timeouts that rely on the + * timely delivery of external interrupts + */ +#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) + +/* + * Recommend not using Auto End-Of-Interrupt feature + */ +#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) + +/* + * Recommend using cluster IPI hypercalls. + */ +#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) + +/* Recommend using the newer ExProcessorMasks interface */ +#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) + +/* Recommend using enlightened VMCS */ +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) + +/* + * CPU management features identification. + * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. + */ +#define HV_X64_START_LOGICAL_PROCESSOR BIT(0) +#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1) +#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2) +#define HV_X64_RESERVED_IDENTITY_BIT BIT(31) + +/* + * Virtual processor will never share a physical core with another virtual + * processor, except for virtual processors that are reported as sibling SMT + * threads. + */ +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) + +/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ +#define HV_X64_NESTED_DIRECT_FLUSH BIT(17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) +#define HV_X64_NESTED_MSR_BITMAP BIT(19) + +/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0) + +/* + * This is specific to AMD and specifies that enlightened TLB flush is + * supported. If guest opts in to this feature, ASID invalidations only + * flushes gva -> hpa mapping entries. To flush the TLB entries derived + * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace + * or HvFlushGuestPhysicalAddressList). + */ +#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22) + +/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ +#define HV_PARAVISOR_PRESENT BIT(0) + +/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */ +#define HV_ISOLATION_TYPE GENMASK(3, 0) +#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) +#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) + +enum hv_isolation_type { + HV_ISOLATION_TYPE_NONE = 0, + HV_ISOLATION_TYPE_VBS = 1, + HV_ISOLATION_TYPE_SNP = 2 +}; + +/* Hyper-V specific model specific registers (MSRs) */ + +/* MSR used to identify the guest OS. */ +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 + +/* MSR used to setup pages used to communicate with the hypervisor. */ +#define HV_X64_MSR_HYPERCALL 0x40000001 + +/* MSR used to provide vcpu index */ +#define HV_REGISTER_VP_INDEX 0x40000002 + +/* MSR used to reset the guest OS. */ +#define HV_X64_MSR_RESET 0x40000003 + +/* MSR used to provide vcpu runtime in 100ns units */ +#define HV_X64_MSR_VP_RUNTIME 0x40000010 + +/* MSR used to read the per-partition time reference counter */ +#define HV_REGISTER_TIME_REF_COUNT 0x40000020 + +/* A partition's reference time stamp counter (TSC) page */ +#define HV_REGISTER_REFERENCE_TSC 0x40000021 + +/* MSR used to retrieve the TSC frequency */ +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 + +/* MSR used to retrieve the local APIC timer frequency */ +#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 + +/* Define the virtual APIC registers */ +#define HV_X64_MSR_EOI 0x40000070 +#define HV_X64_MSR_ICR 0x40000071 +#define HV_X64_MSR_TPR 0x40000072 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 + +/* Define synthetic interrupt controller model specific registers. */ +#define HV_REGISTER_SCONTROL 0x40000080 +#define HV_REGISTER_SVERSION 0x40000081 +#define HV_REGISTER_SIEFP 0x40000082 +#define HV_REGISTER_SIMP 0x40000083 +#define HV_REGISTER_EOM 0x40000084 +#define HV_REGISTER_SINT0 0x40000090 +#define HV_REGISTER_SINT1 0x40000091 +#define HV_REGISTER_SINT2 0x40000092 +#define HV_REGISTER_SINT3 0x40000093 +#define HV_REGISTER_SINT4 0x40000094 +#define HV_REGISTER_SINT5 0x40000095 +#define HV_REGISTER_SINT6 0x40000096 +#define HV_REGISTER_SINT7 0x40000097 +#define HV_REGISTER_SINT8 0x40000098 +#define HV_REGISTER_SINT9 0x40000099 +#define HV_REGISTER_SINT10 0x4000009A +#define HV_REGISTER_SINT11 0x4000009B +#define HV_REGISTER_SINT12 0x4000009C +#define HV_REGISTER_SINT13 0x4000009D +#define HV_REGISTER_SINT14 0x4000009E +#define HV_REGISTER_SINT15 0x4000009F + +/* + * Synthetic Timer MSRs. Four timers per vcpu. + */ +#define HV_REGISTER_STIMER0_CONFIG 0x400000B0 +#define HV_REGISTER_STIMER0_COUNT 0x400000B1 +#define HV_REGISTER_STIMER1_CONFIG 0x400000B2 +#define HV_REGISTER_STIMER1_COUNT 0x400000B3 +#define HV_REGISTER_STIMER2_CONFIG 0x400000B4 +#define HV_REGISTER_STIMER2_COUNT 0x400000B5 +#define HV_REGISTER_STIMER3_CONFIG 0x400000B6 +#define HV_REGISTER_STIMER3_COUNT 0x400000B7 + +/* Hyper-V guest idle MSR */ +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 + +/* Hyper-V guest crash notification MSR's */ +#define HV_REGISTER_CRASH_P0 0x40000100 +#define HV_REGISTER_CRASH_P1 0x40000101 +#define HV_REGISTER_CRASH_P2 0x40000102 +#define HV_REGISTER_CRASH_P3 0x40000103 +#define HV_REGISTER_CRASH_P4 0x40000104 +#define HV_REGISTER_CRASH_CTL 0x40000105 + +/* TSC emulation after migration */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 + +/* TSC invariant control */ +#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 + +/* Register name aliases for temporary compatibility */ +#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT +#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG +#define HV_X64_MSR_STIMER1_COUNT HV_REGISTER_STIMER1_COUNT +#define HV_X64_MSR_STIMER1_CONFIG HV_REGISTER_STIMER1_CONFIG +#define HV_X64_MSR_STIMER2_COUNT HV_REGISTER_STIMER2_COUNT +#define HV_X64_MSR_STIMER2_CONFIG HV_REGISTER_STIMER2_CONFIG +#define HV_X64_MSR_STIMER3_COUNT HV_REGISTER_STIMER3_COUNT +#define HV_X64_MSR_STIMER3_CONFIG HV_REGISTER_STIMER3_CONFIG +#define HV_X64_MSR_SCONTROL HV_REGISTER_SCONTROL +#define HV_X64_MSR_SVERSION HV_REGISTER_SVERSION +#define HV_X64_MSR_SIMP HV_REGISTER_SIMP +#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP +#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX +#define HV_X64_MSR_EOM HV_REGISTER_EOM +#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0 +#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15 +#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0 +#define HV_X64_MSR_CRASH_P1 HV_REGISTER_CRASH_P1 +#define HV_X64_MSR_CRASH_P2 HV_REGISTER_CRASH_P2 +#define HV_X64_MSR_CRASH_P3 HV_REGISTER_CRASH_P3 +#define HV_X64_MSR_CRASH_P4 HV_REGISTER_CRASH_P4 +#define HV_X64_MSR_CRASH_CTL HV_REGISTER_CRASH_CTL +#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT +#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC + +/* Hyper-V memory host visibility */ +enum hv_mem_host_visibility { + VMBUS_PAGE_NOT_VISIBLE = 0, + VMBUS_PAGE_VISIBLE_READ_ONLY = 1, + VMBUS_PAGE_VISIBLE_READ_WRITE = 3 +}; + +/* HvCallModifySparseGpaPageHostVisibility hypercall */ +#define HV_MAX_MODIFY_GPA_REP_COUNT ((PAGE_SIZE / sizeof(u64)) - 2) +struct hv_gpa_range_for_visibility { + u64 partition_id; + u32 host_visibility:2; + u32 reserved0:30; + u32 reserved1; + u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT]; +} __packed; + +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + } __packed; +}; + +union hv_vp_assist_msr_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 pfn:52; + } __packed; +}; + +struct hv_reenlightenment_control { + __u64 vector:8; + __u64 reserved1:8; + __u64 enabled:1; + __u64 reserved2:15; + __u64 target_vp:32; +} __packed; + +struct hv_tsc_emulation_control { + __u64 enabled:1; + __u64 reserved:63; +} __packed; + +struct hv_tsc_emulation_status { + __u64 inprogress:1; + __u64 reserved:63; +} __packed; + +#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 +#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) + +#define HV_X64_MSR_CRASH_PARAMS \ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ +#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff + +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + +/* Number of XMM registers used in hypercall input/output */ +#define HV_HYPERCALL_MAX_XMM_REGISTERS 6 + +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; + __u64 current_nested_vmcs; +} __packed; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u16 padding16_1; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16_2[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 padding32_1; + u32 hv_synthetic_controls; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u32 padding32_2; + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 guest_ia32_perf_global_ctrl; + u64 guest_ia32_s_cet; + u64 guest_ssp; + u64 guest_ia32_int_ssp_table_addr; + u64 guest_ia32_lbr_ctl; + u64 padding64_5[2]; + u64 xss_exit_bitmap; + u64 encls_exiting_bitmap; + u64 host_ia32_perf_global_ctrl; + u64 tsc_multiplier; + u64 host_ia32_s_cet; + u64 host_ssp; + u64 host_ia32_int_ssp_table_addr; + u64 padding64_6; +} __packed; + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + +/* + * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose + * SVM enlightenments to guests. + */ +struct hv_vmcb_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB. + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS 31 + +struct hv_partition_assist_pg { + u32 tlb_lock_count; +}; + +enum hv_interrupt_type { + HV_X64_INTERRUPT_TYPE_FIXED = 0x0000, + HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001, + HV_X64_INTERRUPT_TYPE_SMI = 0x0002, + HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003, + HV_X64_INTERRUPT_TYPE_NMI = 0x0004, + HV_X64_INTERRUPT_TYPE_INIT = 0x0005, + HV_X64_INTERRUPT_TYPE_SIPI = 0x0006, + HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007, + HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008, + HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009, + HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A, +}; + +union hv_msi_address_register { + u32 as_uint32; + struct { + u32 reserved1:2; + u32 destination_mode:1; + u32 redirection_hint:1; + u32 reserved2:8; + u32 destination_id:8; + u32 msi_base:12; + }; +} __packed; + +union hv_msi_data_register { + u32 as_uint32; + struct { + u32 vector:8; + u32 delivery_mode:3; + u32 reserved1:3; + u32 level_assert:1; + u32 trigger_mode:1; + u32 reserved2:16; + }; +} __packed; + +/* HvRetargetDeviceInterrupt hypercall */ +union hv_msi_entry { + u64 as_uint64; + struct { + union hv_msi_address_register address; + union hv_msi_data_register data; + } __packed; +}; + +#include + +#endif diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h new file mode 100644 index 000000000..e41cbf2ec --- /dev/null +++ b/arch/x86/include/asm/hypervisor.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2008, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _ASM_X86_HYPERVISOR_H +#define _ASM_X86_HYPERVISOR_H + +/* x86 hypervisor types */ +enum x86_hypervisor_type { + X86_HYPER_NATIVE = 0, + X86_HYPER_VMWARE, + X86_HYPER_MS_HYPERV, + X86_HYPER_XEN_PV, + X86_HYPER_XEN_HVM, + X86_HYPER_KVM, + X86_HYPER_JAILHOUSE, + X86_HYPER_ACRN, +}; + +#ifdef CONFIG_HYPERVISOR_GUEST + +#include +#include +#include + +struct hypervisor_x86 { + /* Hypervisor name */ + const char *name; + + /* Detection routine */ + uint32_t (*detect)(void); + + /* Hypervisor type */ + enum x86_hypervisor_type type; + + /* init time callbacks */ + struct x86_hyper_init init; + + /* runtime callbacks */ + struct x86_hyper_runtime runtime; + + /* ignore nopv parameter */ + bool ignore_nopv; +}; + +extern const struct hypervisor_x86 x86_hyper_vmware; +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_pv; +extern const struct hypervisor_x86 x86_hyper_kvm; +extern const struct hypervisor_x86 x86_hyper_jailhouse; +extern const struct hypervisor_x86 x86_hyper_acrn; +extern struct hypervisor_x86 x86_hyper_xen_hvm; + +extern bool nopv; +extern enum x86_hypervisor_type x86_hyper_type; +extern void init_hypervisor_platform(void); +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return x86_hyper_type == type; +} +#else +static inline void init_hypervisor_platform(void) { } +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return type == X86_HYPER_NATIVE; +} +#endif /* CONFIG_HYPERVISOR_GUEST */ +#endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h new file mode 100644 index 000000000..c715097e9 --- /dev/null +++ b/arch/x86/include/asm/i8259.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_I8259_H +#define _ASM_X86_I8259_H + +#include +#include + +extern unsigned int cached_irq_mask; + +#define __byte(x, y) (((unsigned char *)&(y))[x]) +#define cached_master_mask (__byte(0, cached_irq_mask)) +#define cached_slave_mask (__byte(1, cached_irq_mask)) + +/* i8259A PIC registers */ +#define PIC_MASTER_CMD 0x20 +#define PIC_MASTER_IMR 0x21 +#define PIC_MASTER_ISR PIC_MASTER_CMD +#define PIC_MASTER_POLL PIC_MASTER_ISR +#define PIC_MASTER_OCW3 PIC_MASTER_ISR +#define PIC_SLAVE_CMD 0xa0 +#define PIC_SLAVE_IMR 0xa1 +#define PIC_ELCR1 0x4d0 +#define PIC_ELCR2 0x4d1 + +/* i8259A PIC related value */ +#define PIC_CASCADE_IR 2 +#define MASTER_ICW4_DEFAULT 0x01 +#define SLAVE_ICW4_DEFAULT 0x01 +#define PIC_ICW4_AEOI 2 + +extern raw_spinlock_t i8259A_lock; + +/* the PIC may need a careful delay on some platforms, hence specific calls */ +static inline unsigned char inb_pic(unsigned int port) +{ + unsigned char value = inb(port); + + /* + * delay for some accesses to PIC on motherboard or in chipset + * must be at least one microsecond, so be safe here: + */ + udelay(2); + + return value; +} + +static inline void outb_pic(unsigned char value, unsigned int port) +{ + outb(value, port); + /* + * delay for some accesses to PIC on motherboard or in chipset + * must be at least one microsecond, so be safe here: + */ + udelay(2); +} + +extern struct irq_chip i8259A_chip; + +struct legacy_pic { + int nr_legacy_irqs; + struct irq_chip *chip; + void (*mask)(unsigned int irq); + void (*unmask)(unsigned int irq); + void (*mask_all)(void); + void (*restore_mask)(void); + void (*init)(int auto_eoi); + int (*probe)(void); + int (*irq_pending)(unsigned int irq); + void (*make_irq)(unsigned int irq); +}; + +void legacy_pic_pcat_compat(void); + +extern struct legacy_pic *legacy_pic; +extern struct legacy_pic null_legacy_pic; + +static inline bool has_legacy_pic(void) +{ + return legacy_pic != &null_legacy_pic; +} + +static inline int nr_legacy_irqs(void) +{ + return legacy_pic->nr_legacy_irqs; +} + +#endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h new file mode 100644 index 000000000..980562947 --- /dev/null +++ b/arch/x86/include/asm/ia32.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IA32_H +#define _ASM_X86_IA32_H + + +#ifdef CONFIG_IA32_EMULATION + +#include + +/* + * 32 bit structures for IA32 support. + */ + +#include + +/* signal.h */ + +struct ucontext_ia32 { + unsigned int uc_flags; + unsigned int uc_link; + compat_stack_t uc_stack; + struct sigcontext_32 uc_mcontext; + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +/* This matches struct stat64 in glibc2.2, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + +#define STAT64_HAS_BROKEN_ST_INO 1 + unsigned int __st_ino; + + unsigned int st_mode; + unsigned int st_nlink; + + unsigned int st_uid; + unsigned int st_gid; + + unsigned long long st_rdev; + unsigned char __pad3[4]; + + long long st_size; + unsigned int st_blksize; + + long long st_blocks;/* Number 512-byte blocks allocated */ + + unsigned st_atime; + unsigned st_atime_nsec; + unsigned st_mtime; + unsigned st_mtime_nsec; + unsigned st_ctime; + unsigned st_ctime_nsec; + + unsigned long long st_ino; +} __attribute__((packed)); + +#define IA32_STACK_TOP IA32_PAGE_OFFSET + +#ifdef __KERNEL__ +struct linux_binprm; +extern int ia32_setup_arg_pages(struct linux_binprm *bprm, + unsigned long stack_top, int exec_stack); +struct mm_struct; +extern void ia32_pick_mmap_layout(struct mm_struct *mm); + +#endif + +extern bool __ia32_enabled; + +static inline bool ia32_enabled(void) +{ + return __ia32_enabled; +} + +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + +#else /* !CONFIG_IA32_EMULATION */ + +static inline bool ia32_enabled(void) +{ + return IS_ENABLED(CONFIG_X86_32); +} + +static inline void ia32_disable(void) {} + +#endif + +#endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h new file mode 100644 index 000000000..aa065c94c --- /dev/null +++ b/arch/x86/include/asm/ia32_unistd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IA32_UNISTD_H +#define _ASM_X86_IA32_UNISTD_H + +/* + * This file contains the system call numbers of the ia32 compat ABI, + * this is for the kernel only. + */ +#define __SYSCALL_ia32_NR(x) (x) +#include + +#endif /* _ASM_X86_IA32_UNISTD_H */ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h new file mode 100644 index 000000000..9b08082a5 --- /dev/null +++ b/arch/x86/include/asm/ibt.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IBT_H +#define _ASM_X86_IBT_H + +#include + +/* + * The rules for enabling IBT are: + * + * - CC_HAS_IBT: the toolchain supports it + * - X86_KERNEL_IBT: it is selected in Kconfig + * - !__DISABLE_EXPORTS: this is regular kernel code + * + * Esp. that latter one is a bit non-obvious, but some code like compressed, + * purgatory, realmode etc.. is built with custom CFLAGS that do not include + * -fcf-protection=branch and things will go *bang*. + * + * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0. + */ +#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS) + +#define HAS_KERNEL_IBT 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 +#define ASM_ENDBR "endbr64\n\t" +#else +#define ASM_ENDBR "endbr32\n\t" +#endif + +#define __noendbr __attribute__((nocf_check)) + +/* + * Create a dummy function pointer reference to prevent objtool from marking + * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by + * apply_ibt_endbr()). + */ +#define IBT_NOSEAL(fname) \ + ".pushsection .discard.ibt_endbr_noseal\n\t" \ + _ASM_PTR fname "\n\t" \ + ".popsection\n\t" + +static inline __attribute_const__ u32 gen_endbr(void) +{ + u32 endbr; + + /* + * Generate ENDBR64 in a way that is sure to not result in + * an ENDBR64 instruction as immediate. + */ + asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t" + "not %[endbr]\n\t" + : [endbr] "=&r" (endbr) ); + + return endbr; +} + +static inline __attribute_const__ u32 gen_endbr_poison(void) +{ + /* + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it + * will be unique to (former) ENDBR sites. + */ + return 0x001f0f66; /* osp nopl (%rax) */ +} + +static inline bool is_endbr(u32 val) +{ + if (val == gen_endbr_poison()) + return true; + + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == gen_endbr(); +} + +extern __noendbr u64 ibt_save(void); +extern __noendbr void ibt_restore(u64 save); + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_64 +#define ENDBR endbr64 +#else +#define ENDBR endbr32 +#endif + +#endif /* __ASSEMBLY__ */ + +#else /* !IBT */ + +#define HAS_KERNEL_IBT 0 + +#ifndef __ASSEMBLY__ + +#define ASM_ENDBR +#define IBT_NOSEAL(name) + +#define __noendbr + +static inline bool is_endbr(u32 val) { return false; } + +static inline u64 ibt_save(void) { return 0; } +static inline void ibt_restore(u64 save) { } + +#else /* __ASSEMBLY__ */ + +#define ENDBR + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT) + +#endif /* _ASM_X86_IBT_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h new file mode 100644 index 000000000..fca710a93 --- /dev/null +++ b/arch/x86/include/asm/idtentry.h @@ -0,0 +1,709 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IDTENTRY_H +#define _ASM_X86_IDTENTRY_H + +/* Interrupts/Exceptions */ +#include + +#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) + +#ifndef __ASSEMBLY__ +#include +#include + +#include + +/** + * DECLARE_IDTENTRY - Declare functions for simple IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it + * declares the entry points for usage in C code. There is an ASM variant + * as well which is used to emit the entry stubs in entry_32/64.S. + */ +#define DECLARE_IDTENTRY(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY - Emit code for simple IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * irqentry_enter() contains common code which has to be invoked before + * arbitrary code in the body. irqentry_exit() contains common code + * which has to run before returning to the low level assembly code. + */ +#define DEFINE_IDTENTRY(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + instrumentation_begin(); \ + __##func (regs); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +/* Special case for 32bit IRET 'trap' */ +#define DECLARE_IDTENTRY_SW DECLARE_IDTENTRY +#define DEFINE_IDTENTRY_SW DEFINE_IDTENTRY + +/** + * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Same as DECLARE_IDTENTRY, but has an extra error_code argument for the + * C-handler. + */ +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DEFINE_IDTENTRY_ERRORCODE - Emit code for simple IDT entry points + * Error code pushed by hardware + * @func: Function name of the entry point + * + * Same as DEFINE_IDTENTRY, but has an extra error_code argument + */ +#define DEFINE_IDTENTRY_ERRORCODE(func) \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + instrumentation_begin(); \ + __##func (regs, error_code); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code) + +/** + * DECLARE_IDTENTRY_RAW - Declare functions for raw IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_RAW - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY() this does not invoke the + * idtentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW(func) \ +__visible noinstr void func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_RAW_ERRORCODE - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the + * irqentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ +__visible noinstr void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry + * points (common/spurious) + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points + * @func: Function name of the entry point + * + * The vector number is pushed by the low level entry stub and handed + * to the function as error_code argument which needs to be truncated + * to an u8 because the push is sign extending. + * + * irq_enter/exit_rcu() are invoked before the function body and the + * KVM L1D flush request is set. Stack switching to the interrupt stack + * has to be done in the function body if necessary. + */ +#define DEFINE_IDTENTRY_IRQ(func) \ +static void __##func(struct pt_regs *regs, u32 vector); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + u32 vector = (u32)(u8)error_code; \ + \ + instrumentation_begin(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + run_irq_on_irqstack_cond(__##func, regs, vector); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static noinline void __##func(struct pt_regs *regs, u32 vector) + +/** + * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points + * @func: Function name of the entry point + * + * irqentry_enter/exit() and irq_enter/exit_rcu() are invoked before the + * function body. KVM L1D flush request is set. + * + * Runs the function on the interrupt stack if the entry hit kernel mode + */ +#define DEFINE_IDTENTRY_SYSVEC(func) \ +static void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + instrumentation_begin(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + run_sysvec_on_irqstack_cond(__##func, regs); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static noinline void __##func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY_SYSVEC_SIMPLE - Emit code for simple system vector IDT + * entry points + * @func: Function name of the entry point + * + * Runs the function on the interrupted stack. No switch to IRQ stack and + * only the minimal __irq_enter/exit() handling. + * + * Only use for 'empty' vectors like reschedule IPI and KVM posted + * interrupt vectors. + */ +#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + instrumentation_begin(); \ + __irq_enter_raw(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + __##func (regs); \ + __irq_exit_raw(); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit + * difference + */ +#define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#ifdef CONFIG_X86_64 +/** + * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW, but declares also the NOIST C handler + * which is called from the ASM entry point on user mode entry + */ +#define DECLARE_IDTENTRY_IST(vector, func) \ + DECLARE_IDTENTRY_RAW(vector, func); \ + __visible void noist_##func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_VC - Declare functions for the VC entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE, but declares also the + * safe_stack C handler. + */ +#define DECLARE_IDTENTRY_VC(vector, func) \ + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) + +/** + * DEFINE_IDTENTRY_IST - Emit code for IST entry points + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW + */ +#define DEFINE_IDTENTRY_IST(func) \ + DEFINE_IDTENTRY_RAW(func) + +/** + * DEFINE_IDTENTRY_NOIST - Emit code for NOIST entry points which + * belong to a IST entry point (MCE, DB) + * @func: Function name of the entry point. Must be the same as + * the function name of the corresponding IST variant + * + * Maps to DEFINE_IDTENTRY_RAW(). + */ +#define DEFINE_IDTENTRY_NOIST(func) \ + DEFINE_IDTENTRY_RAW(noist_##func) + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_DF(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(func) + +/** + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler + when raised from kernel mode + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) + +/** + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler + when raised from user mode + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_VC_USER(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) + +#else /* CONFIG_X86_64 */ + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares two functions: + * - The ASM entry point: asm_##func + * - The C handler called from the C shim + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + asmlinkage void asm_##func(void); \ + __visible void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault on 32bit + * @func: Function name of the entry point + * + * This is called through the doublefault shim which already provides + * cr2 in the address argument. + */ +#define DEFINE_IDTENTRY_DF(func) \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +#endif /* !CONFIG_X86_64 */ + +/* C-Code mapping */ +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW + +#ifdef CONFIG_X86_64 +#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST + +#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST +#endif + +#else /* !__ASSEMBLY__ */ + +/* + * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. + */ +#define DECLARE_IDTENTRY(vector, func) \ + idtentry vector asm_##func func has_error_code=0 + +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + idtentry vector asm_##func func has_error_code=1 + +/* Special case for 32bit IRET 'trap'. Do not emit ASM code */ +#define DECLARE_IDTENTRY_SW(vector, func) + +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/* Entries for common/spurious (device) interrupts */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + idtentry_irq vector func + +/* System vector entries */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + idtentry_sysvec vector func + +#ifdef CONFIG_X86_64 +# define DECLARE_IDTENTRY_MCE(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DEBUG(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DF(vector, func) \ + idtentry_df vector asm_##func func + +# define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +# define DECLARE_IDTENTRY_VC(vector, func) \ + idtentry_vc vector asm_##func func + +#else +# define DECLARE_IDTENTRY_MCE(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/* No ASM emitted for DF as this goes through a C shim */ +# define DECLARE_IDTENTRY_DF(vector, func) + +/* No ASM emitted for XEN hypervisor callback */ +# define DECLARE_IDTENTRY_XENCB(vector, func) + +#endif + +/* No ASM code emitted for NMI */ +#define DECLARE_IDTENTRY_NMI(vector, func) + +/* + * ASM code to emit the common vector entry stubs where each stub is + * packed into IDT_ALIGN bytes. + * + * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because + * GCC treats the local vector variable as unsigned int and would expand + * all vectors above 0x7F to a 5 byte push. The original code did an + * adjustment of the vector number to be in the signed byte range to avoid + * this. While clever it's mindboggling counterintuitive and requires the + * odd conversion back to a real vector number in the C entry points. Using + * .byte achieves the same thing and the only fixup needed in the C entry + * point is to mask off the bits above bit 7 because the push is sign + * extending. + */ + .align IDT_ALIGN +SYM_CODE_START(irq_entries_start) + vector=FIRST_EXTERNAL_VECTOR + .rept NR_EXTERNAL_VECTORS + UNWIND_HINT_IRET_REGS +0 : + ENDBR + .byte 0x6a, vector + jmp asm_common_interrupt + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc + vector = vector+1 + .endr +SYM_CODE_END(irq_entries_start) + +#ifdef CONFIG_X86_LOCAL_APIC + .align IDT_ALIGN +SYM_CODE_START(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept NR_SYSTEM_VECTORS + UNWIND_HINT_IRET_REGS +0 : + ENDBR + .byte 0x6a, vector + jmp asm_spurious_interrupt + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc + vector = vector+1 + .endr +SYM_CODE_END(spurious_entries_start) +#endif + +#endif /* __ASSEMBLY__ */ + +/* + * The actual entry points. Note that DECLARE_IDTENTRY*() serves two + * purposes: + * - provide the function declarations when included from C-Code + * - emit the ASM stubs when included from entry_32/64.S + * + * This avoids duplicate defines and ensures that everything is consistent. + */ + +/* + * Dummy trap number so the low level ASM macro vector number checks do not + * match which results in emitting plain IDTENTRY stubs without bells and + * whistles. + */ +#define X86_TRAP_OTHER 0xFFFF + +/* Simple exception entry points. No hardware error code */ +DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); +DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); +DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); +DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); +DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); +DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); +DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error); +DECLARE_IDTENTRY(X86_TRAP_XF, exc_simd_coprocessor_error); + +/* 32bit software IRET trap. Do not emit ASM code */ +DECLARE_IDTENTRY_SW(X86_TRAP_IRET, iret_error); + +/* Simple exception entries with error code pushed by hardware */ +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); + +/* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); +DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); + +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + +#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_64 +DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); +#else +DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check); +#endif +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check); +#endif +#endif + +/* NMI */ + +#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) +/* + * Special NOIST entry point for VMX which invokes this on the kernel + * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI + * 'executing' marker. + * + * On 32bit this just uses the regular NMI entry point because 32-bit does + * not have ISTs. + */ +DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_noist); +#else +#define asm_exc_nmi_noist asm_exc_nmi +#endif + +DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi); +#endif + +/* #DB */ +#ifdef CONFIG_X86_64 +DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); +#else +DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug); +#endif +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug); +#endif + +/* #DF */ +DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); +#endif + +/* #CP */ +#ifdef CONFIG_X86_KERNEL_IBT +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); +#endif + +/* #VC */ +#ifdef CONFIG_AMD_MEM_ENCRYPT +DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); +#endif + +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap); +#endif + +#ifdef CONFIG_INTEL_TDX_GUEST +DECLARE_IDTENTRY(X86_TRAP_VE, exc_virtualization_exception); +#endif + +/* Device interrupts common/spurious */ +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt); +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt); +#endif + +/* System vector entry points */ +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt); +DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt); +DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); +DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); +#endif + +#ifdef CONFIG_SMP +DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); +DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); +DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +# ifdef CONFIG_X86_MCE_THRESHOLD +DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); +# endif + +# ifdef CONFIG_X86_MCE_AMD +DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error); +# endif + +# ifdef CONFIG_X86_THERMAL_VECTOR +DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal); +# endif + +# ifdef CONFIG_IRQ_WORK +DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); +# endif +#endif + +#ifdef CONFIG_HAVE_KVM +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); +#endif + +#if IS_ENABLED(CONFIG_HYPERV) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); +DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); +DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); +#endif + +#if IS_ENABLED(CONFIG_ACRN_GUEST) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); +#endif + +#ifdef CONFIG_XEN_PVHVM +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); +#endif + +#ifdef CONFIG_KVM_GUEST +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt); +#endif + +#undef X86_TRAP_OTHER + +#endif diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h new file mode 100644 index 000000000..0d1dbf235 --- /dev/null +++ b/arch/x86/include/asm/imr.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * imr.h: Isolated Memory Region API + * + * Copyright(c) 2013 Intel Corporation. + * Copyright(c) 2015 Bryan O'Donoghue + */ +#ifndef _IMR_H +#define _IMR_H + +#include + +/* + * IMR agent access mask bits + * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register + * definitions. + */ +#define IMR_ESRAM_FLUSH BIT(31) +#define IMR_CPU_SNOOP BIT(30) /* Applicable only to write */ +#define IMR_RMU BIT(29) +#define IMR_VC1_SAI_ID3 BIT(15) +#define IMR_VC1_SAI_ID2 BIT(14) +#define IMR_VC1_SAI_ID1 BIT(13) +#define IMR_VC1_SAI_ID0 BIT(12) +#define IMR_VC0_SAI_ID3 BIT(11) +#define IMR_VC0_SAI_ID2 BIT(10) +#define IMR_VC0_SAI_ID1 BIT(9) +#define IMR_VC0_SAI_ID0 BIT(8) +#define IMR_CPU_0 BIT(1) /* SMM mode */ +#define IMR_CPU BIT(0) /* Non SMM mode */ +#define IMR_ACCESS_NONE 0 + +/* + * Read/Write access-all bits here include some reserved bits + * These are the values firmware uses and are accepted by hardware. + * The kernel defines read/write access-all in the same way as firmware + * in order to have a consistent and crisp definition across firmware, + * bootloader and kernel. + */ +#define IMR_READ_ACCESS_ALL 0xBFFFFFFF +#define IMR_WRITE_ACCESS_ALL 0xFFFFFFFF + +/* Number of IMRs provided by Quark X1000 SoC */ +#define QUARK_X1000_IMR_MAX 0x08 +#define QUARK_X1000_IMR_REGBASE 0x40 + +/* IMR alignment bits - only bits 31:10 are checked for IMR validity */ +#define IMR_ALIGN 0x400 +#define IMR_MASK (IMR_ALIGN - 1) + +int imr_add_range(phys_addr_t base, size_t size, + unsigned int rmask, unsigned int wmask); + +int imr_remove_range(phys_addr_t base, size_t size); + +#endif /* _IMR_H */ diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 000000000..b56c57415 --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + */ +#include /* __ignore_sync_check__ */ + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ +#define INAT_PFX_EVEX 15 /* EVEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Identifiers for segment registers */ +#define INAT_SEG_REG_IGNORE 0 +#define INAT_SEG_REG_DEFAULT 1 +#define INAT_SEG_REG_CS 2 +#define INAT_SEG_REG_SS 3 +#define INAT_SEG_REG_DS 4 +#define INAT_SEG_REG_ES 5 +#define INAT_SEG_REG_FS 6 +#define INAT_SEG_REG_GS 7 + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || + attr == INAT_PFX_EVEX; +} + +static inline int inat_is_evex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & (INAT_VEXONLY | INAT_EVEXONLY); +} + +static inline int inat_must_evex(insn_attr_t attr) +{ + return attr & INAT_EVEXONLY; +} +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 000000000..b047efa9d --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h new file mode 100644 index 000000000..5f1d3c421 --- /dev/null +++ b/arch/x86/include/asm/init.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INIT_H +#define _ASM_X86_INIT_H + +struct x86_mapping_info { + void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ + void *context; /* context for alloc_pgt_page */ + unsigned long page_flag; /* page flag for PMD or PUD entry */ + unsigned long offset; /* ident mapping offset */ + bool direct_gbpages; /* PUD level 1GB page support */ + unsigned long kernpg_flag; /* kernel pagetable flag override */ +}; + +int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, + unsigned long pstart, unsigned long pend); + +#endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h new file mode 100644 index 000000000..f07faa61c --- /dev/null +++ b/arch/x86/include/asm/insn-eval.h @@ -0,0 +1,47 @@ +#ifndef _ASM_X86_INSN_EVAL_H +#define _ASM_X86_INSN_EVAL_H +/* + * A collection of utility functions for x86 instruction analysis to be + * used in a kernel context. Useful when, for instance, making sense + * of the registers indicated by operands. + */ + +#include +#include +#include +#include + +#define INSN_CODE_SEG_ADDR_SZ(params) ((params >> 4) & 0xf) +#define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf) +#define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4)) + +int pt_regs_offset(struct pt_regs *regs, int regno); + +bool insn_has_rep_prefix(struct insn *insn); +void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); +int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); +int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs); +unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs); +unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); +int insn_get_code_seg_params(struct pt_regs *regs); +int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip); +int insn_fetch_from_user(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); +int insn_fetch_from_user_inatomic(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); +bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE], int buf_size); + +enum mmio_type { + MMIO_DECODE_FAILED, + MMIO_WRITE, + MMIO_WRITE_IMM, + MMIO_READ, + MMIO_READ_ZERO_EXTEND, + MMIO_READ_SIGN_EXTEND, + MMIO_MOVS, +}; + +enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes); + +#endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 000000000..1b29f58f7 --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,276 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include +/* insn_attr_t is defined in inat.h */ +#include /* __ignore_sync_check__ */ + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; +} + +#else + +struct insn_field { + insn_value_t value; + union { + insn_value_t little; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->little = __cpu_to_le32(v); + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; + p->value = __le32_to_cpu(p->little); +} +#endif + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + int emulate_prefix_size; + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern int insn_get_prefixes(struct insn *insn); +extern int insn_get_opcode(struct insn *insn); +extern int insn_get_modrm(struct insn *insn); +extern int insn_get_sib(struct insn *insn); +extern int insn_get_displacement(struct insn *insn); +extern int insn_get_immediate(struct insn *insn); +extern int insn_get_length(struct insn *insn); + +enum insn_mode { + INSN_MODE_32, + INSN_MODE_64, + /* Mode is determined by the current kernel build. */ + INSN_MODE_KERN, + INSN_NUM_MODES, +}; + +extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +static inline int insn_is_evex(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.nbytes == 4); +} + +static inline int insn_has_emulate_prefix(struct insn *insn) +{ + return !!insn->emulate_prefix_size; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ + return X86_VEX3_M(insn->vex_prefix.bytes[1]); + else /* EVEX */ + return X86_EVEX_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 000000000..438ccd4f3 --- /dev/null +++ b/arch/x86/include/asm/inst.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generate .byte code for some instructions not supported by old + * binutils. + */ +#ifndef X86_ASM_INST_H +#define X86_ASM_INST_H + +#ifdef __ASSEMBLY__ + +#define REG_NUM_INVALID 100 + +#define REG_TYPE_R32 0 +#define REG_TYPE_R64 1 +#define REG_TYPE_INVALID 100 + + .macro R32_NUM opd r32 + \opd = REG_NUM_INVALID + .ifc \r32,%eax + \opd = 0 + .endif + .ifc \r32,%ecx + \opd = 1 + .endif + .ifc \r32,%edx + \opd = 2 + .endif + .ifc \r32,%ebx + \opd = 3 + .endif + .ifc \r32,%esp + \opd = 4 + .endif + .ifc \r32,%ebp + \opd = 5 + .endif + .ifc \r32,%esi + \opd = 6 + .endif + .ifc \r32,%edi + \opd = 7 + .endif +#ifdef CONFIG_X86_64 + .ifc \r32,%r8d + \opd = 8 + .endif + .ifc \r32,%r9d + \opd = 9 + .endif + .ifc \r32,%r10d + \opd = 10 + .endif + .ifc \r32,%r11d + \opd = 11 + .endif + .ifc \r32,%r12d + \opd = 12 + .endif + .ifc \r32,%r13d + \opd = 13 + .endif + .ifc \r32,%r14d + \opd = 14 + .endif + .ifc \r32,%r15d + \opd = 15 + .endif +#endif + .endm + + .macro R64_NUM opd r64 + \opd = REG_NUM_INVALID +#ifdef CONFIG_X86_64 + .ifc \r64,%rax + \opd = 0 + .endif + .ifc \r64,%rcx + \opd = 1 + .endif + .ifc \r64,%rdx + \opd = 2 + .endif + .ifc \r64,%rbx + \opd = 3 + .endif + .ifc \r64,%rsp + \opd = 4 + .endif + .ifc \r64,%rbp + \opd = 5 + .endif + .ifc \r64,%rsi + \opd = 6 + .endif + .ifc \r64,%rdi + \opd = 7 + .endif + .ifc \r64,%r8 + \opd = 8 + .endif + .ifc \r64,%r9 + \opd = 9 + .endif + .ifc \r64,%r10 + \opd = 10 + .endif + .ifc \r64,%r11 + \opd = 11 + .endif + .ifc \r64,%r12 + \opd = 12 + .endif + .ifc \r64,%r13 + \opd = 13 + .endif + .ifc \r64,%r14 + \opd = 14 + .endif + .ifc \r64,%r15 + \opd = 15 + .endif +#endif + .endm + + .macro REG_TYPE type reg + R32_NUM reg_type_r32 \reg + R64_NUM reg_type_r64 \reg + .if reg_type_r64 <> REG_NUM_INVALID + \type = REG_TYPE_R64 + .elseif reg_type_r32 <> REG_NUM_INVALID + \type = REG_TYPE_R32 + .else + \type = REG_TYPE_INVALID + .endif + .endm + + .macro PFX_REX opd1 opd2 W=0 + .if ((\opd1 | \opd2) & 8) || \W + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) + .endif + .endm + + .macro MODRM mod opd1 opd2 + .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) + .endm +#endif + +#endif diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h new file mode 100644 index 000000000..5190cc3db --- /dev/null +++ b/arch/x86/include/asm/intel-family.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_FAMILY_H +#define _ASM_X86_INTEL_FAMILY_H + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * While adding a new CPUID for a new microarchitecture, add a new + * group to keep logically sorted out in chronological order. Within + * that group keep the CPUID for the variants sorted by model number. + * + * The defined symbol names have the following form: + * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF} + * where: + * OPTFAMILY Describes the family of CPUs that this belongs to. Default + * is assumed to be "_CORE" (and should be omitted). Other values + * currently in use are _ATOM and _XEON_PHI + * MICROARCH Is the code name for the micro-architecture for this core. + * N.B. Not the platform name. + * OPTDIFF If needed, a short string to differentiate by market segment. + * + * Common OPTDIFFs: + * + * - regular client parts + * _L - regular mobile parts + * _G - parts with extra graphics on + * _X - regular server parts + * _D - micro server parts + * _N,_P - other mobile parts + * _H - premium mobile parts + * _S - other client parts + * + * Historical OPTDIFFs: + * + * _EP - 2 socket server parts + * _EX - 4+ socket server parts + * + * The #define line may optionally include a comment including platform or core + * names. An exception is made for skylake/kabylake where steppings seem to have gotten + * their own names :-( + */ + +/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ +#define INTEL_FAM6_ANY X86_MODEL_ANY + +#define INTEL_FAM6_CORE_YONAH 0x0E + +#define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D + +#define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ +#define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_FAM6_NEHALEM_EX 0x2E + +#define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_FAM6_WESTMERE_EX 0x2F + +#define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_FAM6_IVYBRIDGE_X 0x3E + +#define INTEL_FAM6_HASWELL 0x3C +#define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_FAM6_HASWELL_L 0x45 +#define INTEL_FAM6_HASWELL_G 0x46 + +#define INTEL_FAM6_BROADWELL 0x3D +#define INTEL_FAM6_BROADWELL_G 0x47 +#define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_D 0x56 + +#define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */ +#define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */ +#define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */ +/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */ +/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */ + +#define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */ +/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ +/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */ +/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ + +#define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */ +/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ + +#define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */ +#define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */ + +#define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */ + +#define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ + +#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ + +#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ + +#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ +#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ + +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ + +#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF + +#define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_FAM6_GRANITERAPIDS_D 0xAE + +#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ +#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ +#define INTEL_FAM6_ALDERLAKE_N 0xBE + +#define INTEL_FAM6_RAPTORLAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_FAM6_RAPTORLAKE_S 0xBF + +#define INTEL_FAM6_METEORLAKE 0xAC +#define INTEL_FAM6_METEORLAKE_L 0xAA + +#define INTEL_FAM6_LUNARLAKE_M 0xBD + +#define INTEL_FAM6_ARROWLAKE_H 0xC5 +#define INTEL_FAM6_ARROWLAKE 0xC6 + +/* "Small Core" Processors (Atom/E-Core) */ + +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ + +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ + +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */ +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ + +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ +#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */ + +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */ + +/* Note: the micro-architecture is "Goldmont Plus" */ +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ + +#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ +#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ + +#define INTEL_FAM6_SIERRAFOREST_X 0xAF + +#define INTEL_FAM6_GRANDRIDGE 0xB6 + +/* Xeon Phi */ + +#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ + +/* Family 5 */ +#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ + +#endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h new file mode 100644 index 000000000..c201083b3 --- /dev/null +++ b/arch/x86/include/asm/intel-mid.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel MID specific setup code + * + * (C) Copyright 2009, 2021 Intel Corporation + */ +#ifndef _ASM_X86_INTEL_MID_H +#define _ASM_X86_INTEL_MID_H + +#include + +extern int intel_mid_pci_init(void); +extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev); + +extern void intel_mid_pwr_power_off(void); + +#define INTEL_MID_PWR_LSS_OFFSET 4 +#define INTEL_MID_PWR_LSS_TYPE (1 << 7) + +extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); + +#ifdef CONFIG_X86_INTEL_MID + +extern void intel_scu_devices_create(void); +extern void intel_scu_devices_destroy(void); + +#else /* !CONFIG_X86_INTEL_MID */ + +static inline void intel_scu_devices_create(void) { } +static inline void intel_scu_devices_destroy(void) { } + +#endif /* !CONFIG_X86_INTEL_MID */ + +/* Bus Select SoC Fuse value */ +#define BSEL_SOC_FUSE_MASK 0x7 +/* FSB 133MHz */ +#define BSEL_SOC_FUSE_001 0x1 +/* FSB 100MHz */ +#define BSEL_SOC_FUSE_101 0x5 +/* FSB 83MHz */ +#define BSEL_SOC_FUSE_111 0x7 + +#endif /* _ASM_X86_INTEL_MID_H */ diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h new file mode 100644 index 000000000..2f9eeb5c3 --- /dev/null +++ b/arch/x86/include/asm/intel_ds.h @@ -0,0 +1,38 @@ +#ifndef _ASM_INTEL_DS_H +#define _ASM_INTEL_DS_H + +#include + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS_FMT4 8 +#define MAX_PEBS_EVENTS 32 +#define MAX_FIXED_PEBS_EVENTS 16 + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS + MAX_FIXED_PEBS_EVENTS]; +} __aligned(PAGE_SIZE); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + +struct debug_store_buffers { + char bts_buffer[BTS_BUFFER_SIZE]; + char pebs_buffer[PEBS_BUFFER_SIZE]; +}; + +#endif diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 000000000..994638ef1 --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h @@ -0,0 +1,65 @@ +#ifndef _ASM_X86_INTEL_PCONFIG_H +#define _ASM_X86_INTEL_PCONFIG_H + +#include +#include + +enum pconfig_target { + INVALID_TARGET = 0, + MKTME_TARGET = 1, + PCONFIG_TARGET_NR +}; + +int pconfig_target_supported(enum pconfig_target target); + +enum pconfig_leaf { + MKTME_KEY_PROGRAM = 0, + PCONFIG_LEAF_INVALID, +}; + +#define PCONFIG ".byte 0x0f, 0x01, 0xc5" + +/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ + +/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ +#define MKTME_KEYID_SET_KEY_DIRECT 0 +#define MKTME_KEYID_SET_KEY_RANDOM 1 +#define MKTME_KEYID_CLEAR_KEY 2 +#define MKTME_KEYID_NO_ENCRYPT 3 + +/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ +#define MKTME_AES_XTS_128 (1 << 8) + +/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ +#define MKTME_PROG_SUCCESS 0 +#define MKTME_INVALID_PROG_CMD 1 +#define MKTME_ENTROPY_ERROR 2 +#define MKTME_INVALID_KEYID 3 +#define MKTME_INVALID_ENC_ALG 4 +#define MKTME_DEVICE_BUSY 5 + +/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */ +struct mktme_key_program { + u16 keyid; + u32 keyid_ctrl; + u8 __rsvd[58]; + u8 key_field_1[64]; + u8 key_field_2[64]; +} __packed __aligned(256); + +static inline int mktme_key_program(struct mktme_key_program *key_program) +{ + unsigned long rax = MKTME_KEY_PROGRAM; + + if (!pconfig_target_supported(MKTME_TARGET)) + return -ENXIO; + + asm volatile(PCONFIG + : "=a" (rax), "=b" (key_program) + : "0" (rax), "1" (key_program) + : "memory", "cc"); + + return rax; +} + +#endif /* _ASM_X86_INTEL_PCONFIG_H */ diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h new file mode 100644 index 000000000..c796e9bc9 --- /dev/null +++ b/arch/x86/include/asm/intel_pt.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_PT_H +#define _ASM_X86_INTEL_PT_H + +#define PT_CPUID_LEAVES 2 +#define PT_CPUID_REGS_NUM 4 /* number of registers (eax, ebx, ecx, edx) */ + +enum pt_capabilities { + PT_CAP_max_subleaf = 0, + PT_CAP_cr3_filtering, + PT_CAP_psb_cyc, + PT_CAP_ip_filtering, + PT_CAP_mtc, + PT_CAP_ptwrite, + PT_CAP_power_event_trace, + PT_CAP_event_trace, + PT_CAP_tnt_disable, + PT_CAP_topa_output, + PT_CAP_topa_multiple_entries, + PT_CAP_single_range_output, + PT_CAP_output_subsys, + PT_CAP_payloads_lip, + PT_CAP_num_address_ranges, + PT_CAP_mtc_periods, + PT_CAP_cycle_thresholds, + PT_CAP_psb_periods, +}; + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +void cpu_emergency_stop_pt(void); +extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap); +extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap); +extern int is_intel_pt_event(struct perf_event *event); +#else +static inline void cpu_emergency_stop_pt(void) {} +static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; } +static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; } +static inline int is_intel_pt_event(struct perf_event *event) { return 0; } +#endif + +#endif /* _ASM_X86_INTEL_PT_H */ diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h new file mode 100644 index 000000000..ce16da719 --- /dev/null +++ b/arch/x86/include/asm/intel_punit_ipc.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_PUNIT_IPC_H_ +#define _ASM_X86_INTEL_PUNIT_IPC_H_ + +/* + * Three types of 8bit P-Unit IPC commands are supported, + * bit[7:6]: [00]: BIOS; [01]: GTD; [10]: ISPD. + */ +typedef enum { + BIOS_IPC = 0, + GTDRIVER_IPC, + ISPDRIVER_IPC, + RESERVED_IPC, +} IPC_TYPE; + +#define IPC_TYPE_OFFSET 6 +#define IPC_PUNIT_BIOS_CMD_BASE (BIOS_IPC << IPC_TYPE_OFFSET) +#define IPC_PUNIT_GTD_CMD_BASE (GTDDRIVER_IPC << IPC_TYPE_OFFSET) +#define IPC_PUNIT_ISPD_CMD_BASE (ISPDRIVER_IPC << IPC_TYPE_OFFSET) +#define IPC_PUNIT_CMD_TYPE_MASK (RESERVED_IPC << IPC_TYPE_OFFSET) + +/* BIOS => Pcode commands */ +#define IPC_PUNIT_BIOS_ZERO (IPC_PUNIT_BIOS_CMD_BASE | 0x00) +#define IPC_PUNIT_BIOS_VR_INTERFACE (IPC_PUNIT_BIOS_CMD_BASE | 0x01) +#define IPC_PUNIT_BIOS_READ_PCS (IPC_PUNIT_BIOS_CMD_BASE | 0x02) +#define IPC_PUNIT_BIOS_WRITE_PCS (IPC_PUNIT_BIOS_CMD_BASE | 0x03) +#define IPC_PUNIT_BIOS_READ_PCU_CONFIG (IPC_PUNIT_BIOS_CMD_BASE | 0x04) +#define IPC_PUNIT_BIOS_WRITE_PCU_CONFIG (IPC_PUNIT_BIOS_CMD_BASE | 0x05) +#define IPC_PUNIT_BIOS_READ_PL1_SETTING (IPC_PUNIT_BIOS_CMD_BASE | 0x06) +#define IPC_PUNIT_BIOS_WRITE_PL1_SETTING (IPC_PUNIT_BIOS_CMD_BASE | 0x07) +#define IPC_PUNIT_BIOS_TRIGGER_VDD_RAM (IPC_PUNIT_BIOS_CMD_BASE | 0x08) +#define IPC_PUNIT_BIOS_READ_TELE_INFO (IPC_PUNIT_BIOS_CMD_BASE | 0x09) +#define IPC_PUNIT_BIOS_READ_TELE_TRACE_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0a) +#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0b) +#define IPC_PUNIT_BIOS_READ_TELE_EVENT_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0c) +#define IPC_PUNIT_BIOS_WRITE_TELE_EVENT_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0d) +#define IPC_PUNIT_BIOS_READ_TELE_TRACE (IPC_PUNIT_BIOS_CMD_BASE | 0x0e) +#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE (IPC_PUNIT_BIOS_CMD_BASE | 0x0f) +#define IPC_PUNIT_BIOS_READ_TELE_EVENT (IPC_PUNIT_BIOS_CMD_BASE | 0x10) +#define IPC_PUNIT_BIOS_WRITE_TELE_EVENT (IPC_PUNIT_BIOS_CMD_BASE | 0x11) +#define IPC_PUNIT_BIOS_READ_MODULE_TEMP (IPC_PUNIT_BIOS_CMD_BASE | 0x12) +#define IPC_PUNIT_BIOS_RESERVED (IPC_PUNIT_BIOS_CMD_BASE | 0x13) +#define IPC_PUNIT_BIOS_READ_VOLTAGE_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x14) +#define IPC_PUNIT_BIOS_WRITE_VOLTAGE_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x15) +#define IPC_PUNIT_BIOS_READ_RATIO_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x16) +#define IPC_PUNIT_BIOS_WRITE_RATIO_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x17) +#define IPC_PUNIT_BIOS_READ_VF_GL_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x18) +#define IPC_PUNIT_BIOS_WRITE_VF_GL_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x19) +#define IPC_PUNIT_BIOS_READ_FM_SOC_TEMP_THRESH (IPC_PUNIT_BIOS_CMD_BASE | 0x1a) +#define IPC_PUNIT_BIOS_WRITE_FM_SOC_TEMP_THRESH (IPC_PUNIT_BIOS_CMD_BASE | 0x1b) + +/* GT Driver => Pcode commands */ +#define IPC_PUNIT_GTD_ZERO (IPC_PUNIT_GTD_CMD_BASE | 0x00) +#define IPC_PUNIT_GTD_CONFIG (IPC_PUNIT_GTD_CMD_BASE | 0x01) +#define IPC_PUNIT_GTD_READ_ICCP_LIC_CDYN_SCAL (IPC_PUNIT_GTD_CMD_BASE | 0x02) +#define IPC_PUNIT_GTD_WRITE_ICCP_LIC_CDYN_SCAL (IPC_PUNIT_GTD_CMD_BASE | 0x03) +#define IPC_PUNIT_GTD_GET_WM_VAL (IPC_PUNIT_GTD_CMD_BASE | 0x06) +#define IPC_PUNIT_GTD_WRITE_CONFIG_WISHREQ (IPC_PUNIT_GTD_CMD_BASE | 0x07) +#define IPC_PUNIT_GTD_READ_REQ_DUTY_CYCLE (IPC_PUNIT_GTD_CMD_BASE | 0x16) +#define IPC_PUNIT_GTD_DIS_VOL_FREQ_CHG_REQUEST (IPC_PUNIT_GTD_CMD_BASE | 0x17) +#define IPC_PUNIT_GTD_DYNA_DUTY_CYCLE_CTRL (IPC_PUNIT_GTD_CMD_BASE | 0x1a) +#define IPC_PUNIT_GTD_DYNA_DUTY_CYCLE_TUNING (IPC_PUNIT_GTD_CMD_BASE | 0x1c) + +/* ISP Driver => Pcode commands */ +#define IPC_PUNIT_ISPD_ZERO (IPC_PUNIT_ISPD_CMD_BASE | 0x00) +#define IPC_PUNIT_ISPD_CONFIG (IPC_PUNIT_ISPD_CMD_BASE | 0x01) +#define IPC_PUNIT_ISPD_GET_ISP_LTR_VAL (IPC_PUNIT_ISPD_CMD_BASE | 0x02) +#define IPC_PUNIT_ISPD_ACCESS_IU_FREQ_BOUNDS (IPC_PUNIT_ISPD_CMD_BASE | 0x03) +#define IPC_PUNIT_ISPD_READ_CDYN_LEVEL (IPC_PUNIT_ISPD_CMD_BASE | 0x04) +#define IPC_PUNIT_ISPD_WRITE_CDYN_LEVEL (IPC_PUNIT_ISPD_CMD_BASE | 0x05) + +/* Error codes */ +#define IPC_PUNIT_ERR_SUCCESS 0 +#define IPC_PUNIT_ERR_INVALID_CMD 1 +#define IPC_PUNIT_ERR_INVALID_PARAMETER 2 +#define IPC_PUNIT_ERR_CMD_TIMEOUT 3 +#define IPC_PUNIT_ERR_CMD_LOCKED 4 +#define IPC_PUNIT_ERR_INVALID_VR_ID 5 +#define IPC_PUNIT_ERR_VR_ERR 6 + +#if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC) + +int intel_punit_ipc_simple_command(int cmd, int para1, int para2); +int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out); + +#else + +static inline int intel_punit_ipc_simple_command(int cmd, + int para1, int para2) +{ + return -ENODEV; +} + +static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, + u32 *in, u32 *out) +{ + return -ENODEV; +} + +#endif /* CONFIG_INTEL_PUNIT_IPC */ + +#endif diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h new file mode 100644 index 000000000..8537f597d --- /dev/null +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_SCU_IPC_H_ +#define _ASM_X86_INTEL_SCU_IPC_H_ + +#include + +struct device; +struct intel_scu_ipc_dev; + +/** + * struct intel_scu_ipc_data - Data used to configure SCU IPC + * @mem: Base address of SCU IPC MMIO registers + * @irq: The IRQ number used for SCU (optional) + */ +struct intel_scu_ipc_data { + struct resource mem; + int irq; +}; + +struct intel_scu_ipc_dev * +__intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define intel_scu_ipc_register(parent, scu_data) \ + __intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu); + +struct intel_scu_ipc_dev * +__devm_intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define devm_intel_scu_ipc_register(parent, scu_data) \ + __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void); +void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu); +struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev); + +int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 *data); +int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data); +int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); +int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); + +int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data, u8 mask); + +int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub); +int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + size_t size, void *out, size_t outlen); + +static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + void *out, size_t outlen) +{ + return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen, + inlen, out, outlen); +} + +#endif diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h new file mode 100644 index 000000000..8046e70df --- /dev/null +++ b/arch/x86/include/asm/intel_telemetry.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel SOC Telemetry Driver Header File + * Copyright (C) 2015, Intel Corporation. + * All Rights Reserved. + */ +#ifndef INTEL_TELEMETRY_H +#define INTEL_TELEMETRY_H + +#define TELEM_MAX_EVENTS_SRAM 28 +#define TELEM_MAX_OS_ALLOCATED_EVENTS 20 + +#include + +enum telemetry_unit { + TELEM_PSS = 0, + TELEM_IOSS, + TELEM_UNIT_NONE +}; + +struct telemetry_evtlog { + u32 telem_evtid; + u64 telem_evtlog; +}; + +struct telemetry_evtconfig { + /* Array of Event-IDs to Enable */ + u32 *evtmap; + + /* Number of Events (<29) in evtmap */ + u8 num_evts; + + /* Sampling period */ + u8 period; +}; + +struct telemetry_evtmap { + const char *name; + u32 evt_id; +}; + +struct telemetry_unit_config { + struct telemetry_evtmap *telem_evts; + void __iomem *regmap; + u8 ssram_evts_used; + u8 curr_period; + u8 max_period; + u8 min_period; +}; + +struct telemetry_plt_config { + struct telemetry_unit_config pss_config; + struct telemetry_unit_config ioss_config; + struct mutex telem_trace_lock; + struct mutex telem_lock; + struct intel_pmc_dev *pmc; + struct intel_scu_ipc_dev *scu; + bool telem_in_use; +}; + +struct telemetry_core_ops { + int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period, + u8 *ioss_min_period, u8 *ioss_max_period); + + int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig, + struct telemetry_evtconfig *ioss_evtconfig, + int pss_len, int ioss_len); + + int (*update_events)(struct telemetry_evtconfig pss_evtconfig, + struct telemetry_evtconfig ioss_evtconfig); + + int (*set_sampling_period)(u8 pss_period, u8 ioss_period); + + int (*get_trace_verbosity)(enum telemetry_unit telem_unit, + u32 *verbosity); + + int (*set_trace_verbosity)(enum telemetry_unit telem_unit, + u32 verbosity); + + int (*raw_read_eventlog)(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, + int len, int log_all_evts); + + int (*read_eventlog)(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, + int len, int log_all_evts); + + int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts, + u32 *pss_evtmap, u32 *ioss_evtmap); + + int (*reset_events)(void); +}; + +int telemetry_set_pltdata(const struct telemetry_core_ops *ops, + struct telemetry_plt_config *pltconfig); + +int telemetry_clear_pltdata(void); + +struct telemetry_plt_config *telemetry_get_pltdata(void); + +int telemetry_get_evtname(enum telemetry_unit telem_unit, + const char **name, int len); + +int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig, + struct telemetry_evtconfig ioss_evtconfig); + +int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts, + u32 *pss_evtmap, u32 *ioss_evtmap); + +int telemetry_reset_events(void); + +int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config, + struct telemetry_evtconfig *ioss_config, + int pss_len, int ioss_len); + +int telemetry_read_events(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_raw_read_events(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_read_eventlog(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit, + struct telemetry_evtlog *evtlog, int len); + +int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period, + u8 *ioss_min_period, u8 *ioss_max_period); + +int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period); + +int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit, + u32 verbosity); + +int telemetry_get_trace_verbosity(enum telemetry_unit telem_unit, + u32 *verbosity); + +#endif /* INTEL_TELEMETRY_H */ diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h new file mode 100644 index 000000000..734482afb --- /dev/null +++ b/arch/x86/include/asm/invpcid.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVPCID +#define _ASM_X86_INVPCID + +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + */ + asm volatile("invpcid %[desc], %[type]" + :: [desc] "m" (desc), [type] "r" (type) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + +#endif /* _ASM_X86_INVPCID */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h new file mode 100644 index 000000000..e9025640f --- /dev/null +++ b/arch/x86/include/asm/io.h @@ -0,0 +1,392 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IO_H +#define _ASM_X86_IO_H + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo + */ + +#define ARCH_HAS_IOREMAP_WC +#define ARCH_HAS_IOREMAP_WT + +#include +#include +#include +#include +#include +#include +#include + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "=q", :"memory") +build_mmio_read(readw, "w", unsigned short, "=r", :"memory") +build_mmio_read(readl, "l", unsigned int, "=r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "=q", ) +build_mmio_read(__readw, "w", unsigned short, "=r", ) +build_mmio_read(__readl, "l", unsigned int, "=r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb readb +#define readw readw +#define readl readl +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define writeb writeb +#define writew writew +#define writel writel +#define writeb_relaxed(v, a) __writeb(v, a) +#define writew_relaxed(v, a) __writew(v, a) +#define writel_relaxed(v, a) __writel(v, a) +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#ifdef CONFIG_X86_64 + +build_mmio_read(readq, "q", u64, "=r", :"memory") +build_mmio_read(__readq, "q", u64, "=r", ) +build_mmio_write(writeq, "q", u64, "r", :"memory") +build_mmio_write(__writeq, "q", u64, "r", ) + +#define readq_relaxed(a) __readq(a) +#define writeq_relaxed(v, a) __writeq(v, a) + +#define __raw_readq __readq +#define __raw_writeq __writeq + +/* Let people know that we have them */ +#define readq readq +#define writeq writeq + +#endif + +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline phys_addr_t virt_to_phys(volatile void *address) +{ + return __pa(address); +} +#define virt_to_phys virt_to_phys + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void *phys_to_virt(phys_addr_t address) +{ + return __va(address); +} +#define phys_to_virt phys_to_virt + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +/* + * ISA I/O bus memory addresses are 1:1 with the physical address. + * However, we truncate the address to unsigned int to avoid undesirable + * promotions in legacy drivers. + */ +static inline unsigned int isa_virt_to_bus(volatile void *address) +{ + return (unsigned int)virt_to_phys(address); +} +#define isa_bus_to_virt phys_to_virt + +/* + * The default ioremap() behavior is non-cached; if you need something + * else, you probably want one of the following. + */ +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); +#define ioremap_uc ioremap_uc +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +#define ioremap_cache ioremap_cache +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); +#define ioremap_prot ioremap_prot +extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); +#define ioremap_encrypted ioremap_encrypted + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * If the area you are trying to map is a PCI BAR you should have a + * look at pci_iomap(). + */ +void __iomem *ioremap(resource_size_t offset, unsigned long size); +#define ioremap ioremap + +extern void iounmap(volatile void __iomem *addr); +#define iounmap iounmap + +#ifdef __KERNEL__ + +void memcpy_fromio(void *, const volatile void __iomem *, size_t); +void memcpy_toio(volatile void __iomem *, const void *, size_t); +void memset_io(volatile void __iomem *, int, size_t); + +#define memcpy_fromio memcpy_fromio +#define memcpy_toio memcpy_toio +#define memset_io memset_io + +#include + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) + +#endif /* __KERNEL__ */ + +extern void native_io_delay(void); + +extern int io_delay_type; +extern void io_delay_init(void); + +#if defined(CONFIG_PARAVIRT) +#include +#else + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); +#endif +} + +#endif + +#define BUILDIO(bwl, bw, type) \ +static inline void out##bwl##_p(type value, u16 port) \ +{ \ + out##bwl(value, port); \ + slow_down_io(); \ +} \ + \ +static inline type in##bwl##_p(u16 port) \ +{ \ + type value = in##bwl(port); \ + slow_down_io(); \ + return value; \ +} \ + \ +static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \ +{ \ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ + type *value = (type *)addr; \ + while (count) { \ + out##bwl(*value, port); \ + value++; \ + count--; \ + } \ + } else { \ + asm volatile("rep; outs" #bwl \ + : "+S"(addr), "+c"(count) \ + : "d"(port) : "memory"); \ + } \ +} \ + \ +static inline void ins##bwl(u16 port, void *addr, unsigned long count) \ +{ \ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ + type *value = (type *)addr; \ + while (count) { \ + *value = in##bwl(port); \ + value++; \ + count--; \ + } \ + } else { \ + asm volatile("rep; ins" #bwl \ + : "+D"(addr), "+c"(count) \ + : "d"(port) : "memory"); \ + } \ +} + +BUILDIO(b, b, u8) +BUILDIO(w, w, u16) +BUILDIO(l, , u32) +#undef BUILDIO + +#define inb_p inb_p +#define inw_p inw_p +#define inl_p inl_p +#define insb insb +#define insw insw +#define insl insl + +#define outb_p outb_p +#define outw_p outw_p +#define outl_p outl_p +#define outsb outsb +#define outsw outsw +#define outsl outsl + +extern void *xlate_dev_mem_ptr(phys_addr_t phys); +extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); + +#define xlate_dev_mem_ptr xlate_dev_mem_ptr +#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr + +extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, + enum page_cache_mode pcm); +extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); +#define ioremap_wc ioremap_wc +extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); +#define ioremap_wt ioremap_wt + +extern bool is_early_ioremap_ptep(pte_t *ptep); + +#define IO_SPACE_LIMIT 0xffff + +#include +#undef PCI_IOBASE + +#ifdef CONFIG_MTRR +extern int __must_check arch_phys_wc_index(int handle); +#define arch_phys_wc_index arch_phys_wc_index + +extern int __must_check arch_phys_wc_add(unsigned long base, + unsigned long size); +extern void arch_phys_wc_del(int handle); +#define arch_phys_wc_add arch_phys_wc_add +#endif + +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + +#ifdef CONFIG_AMD_MEM_ENCRYPT +extern bool arch_memremap_can_ram_remap(resource_size_t offset, + unsigned long size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap + +extern bool phys_mem_access_encrypted(unsigned long phys_addr, + unsigned long size); +#else +static inline bool phys_mem_access_encrypted(unsigned long phys_addr, + unsigned long size) +{ + return true; +} +#endif + +/** + * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units + * @dst: destination, in MMIO space (must be 512-bit aligned) + * @src: source + * @count: number of 512 bits quantities to submit + * + * Submit data from kernel space to MMIO space, in units of 512 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + * + * Warning: Do not use this helper unless your driver has checked that the CPU + * instruction is supported on the platform. + */ +static inline void iosubmit_cmds512(void __iomem *dst, const void *src, + size_t count) +{ + const u8 *from = src; + const u8 *end = from + count * 64; + + while (from < end) { + movdir64b(dst, from); + from += 64; + } +} + +#endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h new file mode 100644 index 000000000..437aa8d00 --- /dev/null +++ b/arch/x86/include/asm/io_apic.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IO_APIC_H +#define _ASM_X86_IO_APIC_H + +#include +#include +#include +#include +#include +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +/* + * The structure of the IO-APIC: + */ +union IO_APIC_reg_00 { + u32 raw; + struct { + u32 __reserved_2 : 14, + LTS : 1, + delivery_type : 1, + __reserved_1 : 8, + ID : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_01 { + u32 raw; + struct { + u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_02 { + u32 raw; + struct { + u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_03 { + u32 raw; + struct { + u32 boot_DT : 1, + __reserved_1 : 31; + } __attribute__ ((packed)) bits; +}; + +struct IO_APIC_route_entry { + union { + struct { + u64 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + delivery_status : 1, + active_low : 1, + irr : 1, + is_level : 1, + masked : 1, + reserved_0 : 15, + reserved_1 : 17, + virt_destid_8_14 : 7, + destid_0_7 : 8; + }; + struct { + u64 ir_shared_0 : 8, + ir_zero : 3, + ir_index_15 : 1, + ir_shared_1 : 5, + ir_reserved_0 : 31, + ir_format : 1, + ir_index_0_14 : 15; + }; + struct { + u64 w1 : 32, + w2 : 32; + }; + }; +} __attribute__ ((packed)); + +struct irq_alloc_info; +struct ioapic_domain_cfg; + +#define IOAPIC_MAP_ALLOC 0x1 +#define IOAPIC_MAP_CHECK 0x2 + +#ifdef CONFIG_X86_IO_APIC + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; + +extern int mpc_ioapic_id(int ioapic); +extern unsigned int mpc_ioapic_addr(int ioapic); + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* 1 if "noapic" boot option passed */ +extern int skip_ioapic_setup; + +/* 1 if "noapic" boot option passed */ +extern int noioapicquirk; + +/* -1 if "noapic" boot option passed */ +extern int noioapicreroute; + +extern u32 gsi_top; + +extern unsigned long io_apic_irqs; + +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs \ + (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) + +struct irq_cfg; +extern void ioapic_insert_resources(void); +extern int arch_early_ioapic_init(void); + +extern int save_ioapic_entries(void); +extern void mask_ioapic_entries(void); +extern int restore_ioapic_entries(void); + +extern void setup_ioapic_ids_from_mpc(void); +extern void setup_ioapic_ids_from_mpc_nocheck(void); + +extern int mp_find_ioapic(u32 gsi); +extern int mp_find_ioapic_pin(int ioapic, u32 gsi); +extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info); +extern void mp_unmap_irq(int irq); +extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg); +extern int mp_unregister_ioapic(u32 gsi_base); +extern int mp_ioapic_registered(u32 gsi_base); + +extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, + int node, int trigger, int polarity); + +extern void mp_save_irq(struct mpc_intsrc *m); + +extern void disable_ioapic_support(void); + +extern void __init io_apic_init_mappings(void); +extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); +extern void native_restore_boot_irq_mode(void); + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + return x86_apic_ops.io_apic_read(apic, reg); +} + +extern void setup_IO_APIC(void); +extern void enable_IO_APIC(void); +extern void clear_IO_APIC(void); +extern void restore_boot_irq_mode(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); +extern void print_IO_APICs(void); +#else /* !CONFIG_X86_IO_APIC */ + +#define IO_APIC_IRQ(x) 0 +#define io_apic_assign_pci_irqs 0 +#define setup_ioapic_ids_from_mpc x86_init_noop +static inline void ioapic_insert_resources(void) { } +static inline int arch_early_ioapic_init(void) { return 0; } +static inline void print_IO_APICs(void) {} +#define gsi_top (NR_IRQS_LEGACY) +static inline int mp_find_ioapic(u32 gsi) { return 0; } +static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info) +{ + return gsi; +} + +static inline void mp_unmap_irq(int irq) { } + +static inline int save_ioapic_entries(void) +{ + return -ENOMEM; +} + +static inline void mask_ioapic_entries(void) { } +static inline int restore_ioapic_entries(void) +{ + return -ENOMEM; +} + +static inline void mp_save_irq(struct mpc_intsrc *m) { } +static inline void disable_ioapic_support(void) { } +static inline void io_apic_init_mappings(void) { } +#define native_io_apic_read NULL +#define native_restore_boot_irq_mode NULL + +static inline void setup_IO_APIC(void) { } +static inline void enable_IO_APIC(void) { } +static inline void restore_boot_irq_mode(void) { } + +#endif + +#endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000..7f080f5c7 --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include +#include + +struct io_bitmap { + u64 sequence; + refcount_t refcnt; + /* The maximum number of bytes to copy so all zero bits are covered */ + unsigned int max; + unsigned long bitmap[IO_BITMAP_LONGS]; +}; + +struct task_struct; + +#ifdef CONFIG_X86_IOPL_IOPERM +void io_bitmap_share(struct task_struct *tsk); +void io_bitmap_exit(struct task_struct *tsk); + +static inline void native_tss_invalidate_io_bitmap(void) +{ + /* + * Invalidate the I/O bitmap by moving io_bitmap_base outside the + * TSS limit so any subsequent I/O access from user space will + * trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. + */ + this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base, + IO_BITMAP_OFFSET_INVALID); +} + +void native_tss_update_io_bitmap(void); + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#define tss_update_io_bitmap native_tss_update_io_bitmap +#define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap +#endif + +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(struct task_struct *tsk) { } +static inline void tss_update_io_bitmap(void) { } +#endif + +#endif diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h new file mode 100644 index 000000000..e2de092fc --- /dev/null +++ b/arch/x86/include/asm/iomap.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_IOMAP_H +#define _ASM_X86_IOMAP_H + +/* + * Copyright © 2008 Ingo Molnar + */ + +#include +#include +#include +#include +#include +#include + +void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot); + +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); + +void iomap_free(resource_size_t base, unsigned long size); + +#endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h new file mode 100644 index 000000000..2fd52b65d --- /dev/null +++ b/arch/x86/include/asm/iommu.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOMMU_H +#define _ASM_X86_IOMMU_H + +#include + +#include + +extern int force_iommu, no_iommu; +extern int iommu_detected; +extern int iommu_merge; +extern int panic_on_overflow; + +#ifdef CONFIG_SWIOTLB +extern bool x86_swiotlb_enable; +#else +#define x86_swiotlb_enable false +#endif + +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) + +static inline int __init +arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +{ + u64 start = rmrr->base_address; + u64 end = rmrr->end_address + 1; + int entry_type; + + entry_type = e820__get_entry_type(start, end); + if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS) + return 0; + + pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n", + start, end - 1); + return -EINVAL; +} + +#endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h new file mode 100644 index 000000000..a1911fea8 --- /dev/null +++ b/arch/x86/include/asm/iosf_mbi.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel OnChip System Fabric MailBox access support + */ + +#ifndef IOSF_MBI_SYMS_H +#define IOSF_MBI_SYMS_H + +#include + +#define MBI_MCR_OFFSET 0xD0 +#define MBI_MDR_OFFSET 0xD4 +#define MBI_MCRX_OFFSET 0xD8 + +#define MBI_RD_MASK 0xFEFFFFFF +#define MBI_WR_MASK 0X01000000 + +#define MBI_MASK_HI 0xFFFFFF00 +#define MBI_MASK_LO 0x000000FF +#define MBI_ENABLE 0xF0 + +/* IOSF SB read/write opcodes */ +#define MBI_MMIO_READ 0x00 +#define MBI_MMIO_WRITE 0x01 +#define MBI_CFG_READ 0x04 +#define MBI_CFG_WRITE 0x05 +#define MBI_CR_READ 0x06 +#define MBI_CR_WRITE 0x07 +#define MBI_REG_READ 0x10 +#define MBI_REG_WRITE 0x11 +#define MBI_ESRAM_READ 0x12 +#define MBI_ESRAM_WRITE 0x13 + +/* Baytrail available units */ +#define BT_MBI_UNIT_AUNIT 0x00 +#define BT_MBI_UNIT_SMC 0x01 +#define BT_MBI_UNIT_CPU 0x02 +#define BT_MBI_UNIT_BUNIT 0x03 +#define BT_MBI_UNIT_PMC 0x04 +#define BT_MBI_UNIT_GFX 0x06 +#define BT_MBI_UNIT_SMI 0x0C +#define BT_MBI_UNIT_CCK 0x14 +#define BT_MBI_UNIT_USB 0x43 +#define BT_MBI_UNIT_SATA 0xA3 +#define BT_MBI_UNIT_PCIE 0xA6 + +/* Quark available units */ +#define QRK_MBI_UNIT_HBA 0x00 +#define QRK_MBI_UNIT_HB 0x03 +#define QRK_MBI_UNIT_RMU 0x04 +#define QRK_MBI_UNIT_MM 0x05 +#define QRK_MBI_UNIT_SOC 0x31 + +/* Action values for the pmic_bus_access_notifier functions */ +#define MBI_PMIC_BUS_ACCESS_BEGIN 1 +#define MBI_PMIC_BUS_ACCESS_END 2 + +#if IS_ENABLED(CONFIG_IOSF_MBI) + +bool iosf_mbi_available(void); + +/** + * iosf_mbi_read() - MailBox Interface read command + * @port: port indicating subunit being accessed + * @opcode: port specific read or write opcode + * @offset: register address offset + * @mdr: register data to be read + * + * Locking is handled by spinlock - cannot sleep. + * Return: Nonzero on error + */ +int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr); + +/** + * iosf_mbi_write() - MailBox unmasked write command + * @port: port indicating subunit being accessed + * @opcode: port specific read or write opcode + * @offset: register address offset + * @mdr: register data to be written + * + * Locking is handled by spinlock - cannot sleep. + * Return: Nonzero on error + */ +int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr); + +/** + * iosf_mbi_modify() - MailBox masked write command + * @port: port indicating subunit being accessed + * @opcode: port specific read or write opcode + * @offset: register address offset + * @mdr: register data being modified + * @mask: mask indicating bits in mdr to be modified + * + * Locking is handled by spinlock - cannot sleep. + * Return: Nonzero on error + */ +int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); + +/** + * iosf_mbi_punit_acquire() - Acquire access to the P-Unit + * + * One some systems the P-Unit accesses the PMIC to change various voltages + * through the same bus as other kernel drivers use for e.g. battery monitoring. + * + * If a driver sends requests to the P-Unit which require the P-Unit to access + * the PMIC bus while another driver is also accessing the PMIC bus various bad + * things happen. + * + * Call this function before sending requests to the P-Unit which may make it + * access the PMIC, be it through iosf_mbi* functions or through other means. + * This function will block all kernel access to the PMIC I2C bus, so that the + * P-Unit can safely access the PMIC over the shared I2C bus. + * + * Note on these systems the i2c-bus driver will request a sempahore from the + * P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing + * it, but this does not appear to be sufficient, we still need to avoid making + * certain P-Unit requests during the access window to avoid problems. + * + * This function locks a mutex, as such it may sleep. + */ +void iosf_mbi_punit_acquire(void); + +/** + * iosf_mbi_punit_release() - Release access to the P-Unit + */ +void iosf_mbi_punit_release(void); + +/** + * iosf_mbi_block_punit_i2c_access() - Block P-Unit accesses to the PMIC bus + * + * Call this function to block P-Unit access to the PMIC I2C bus, so that the + * kernel can safely access the PMIC over the shared I2C bus. + * + * This function acquires the P-Unit bus semaphore and notifies + * pmic_bus_access_notifier listeners that they may no longer access the + * P-Unit in a way which may cause it to access the shared I2C bus. + * + * Note this function may be called multiple times and the bus will not + * be released until iosf_mbi_unblock_punit_i2c_access() has been called the + * same amount of times. + * + * Return: Nonzero on error + */ +int iosf_mbi_block_punit_i2c_access(void); + +/* + * iosf_mbi_unblock_punit_i2c_access() - Release PMIC I2C bus block + * + * Release i2c access block gotten through iosf_mbi_block_punit_i2c_access(). + */ +void iosf_mbi_unblock_punit_i2c_access(void); + +/** + * iosf_mbi_register_pmic_bus_access_notifier - Register PMIC bus notifier + * + * This function can be used by drivers which may need to acquire P-Unit + * managed resources from interrupt context, where iosf_mbi_punit_acquire() + * can not be used. + * + * This function allows a driver to register a notifier to get notified (in a + * process context) before other drivers start accessing the PMIC bus. + * + * This allows the driver to acquire any resources, which it may need during + * the window the other driver is accessing the PMIC, before hand. + * + * @nb: notifier_block to register + */ +int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); + +/** + * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier + * + * @nb: notifier_block to unregister + */ +int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); + +/** + * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus + * notifier, unlocked + * + * Like iosf_mbi_unregister_pmic_bus_access_notifier(), but for use when the + * caller has already called iosf_mbi_punit_acquire() itself. + * + * @nb: notifier_block to unregister + */ +int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + struct notifier_block *nb); + +/** + * iosf_mbi_assert_punit_acquired - Assert that the P-Unit has been acquired. + */ +void iosf_mbi_assert_punit_acquired(void); + +#else /* CONFIG_IOSF_MBI is not enabled */ +static inline +bool iosf_mbi_available(void) +{ + return false; +} + +static inline +int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) +{ + WARN(1, "IOSF_MBI driver not available"); + return -EPERM; +} + +static inline +int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr) +{ + WARN(1, "IOSF_MBI driver not available"); + return -EPERM; +} + +static inline +int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) +{ + WARN(1, "IOSF_MBI driver not available"); + return -EPERM; +} + +static inline void iosf_mbi_punit_acquire(void) {} +static inline void iosf_mbi_punit_release(void) {} + +static inline +int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline +int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int +iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(struct notifier_block *nb) +{ + return 0; +} + +static inline +int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) +{ + return 0; +} + +static inline void iosf_mbi_assert_punit_acquired(void) {} + +#endif /* CONFIG_IOSF_MBI */ + +#endif /* IOSF_MBI_SYMS_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h new file mode 100644 index 000000000..768aa234c --- /dev/null +++ b/arch/x86/include/asm/irq.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_H +#define _ASM_X86_IRQ_H +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * + */ + +#include +#include + +/* + * The irq entry code is in the noinstr section and the start/end of + * __irqentry_text is emitted via labels. Make the build fail if + * something moves a C function into the __irq_entry section. + */ +#define __irq_entry __invalid_section + +static inline int irq_canonicalize(int irq) +{ + return ((irq == 2) ? 9 : irq); +} + +extern int irq_init_percpu_irqstack(unsigned int cpu); + +struct irq_desc; + +extern void fixup_irqs(void); + +#ifdef CONFIG_HAVE_KVM +extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); +#endif + +extern void (*x86_platform_ipi_callback)(void); +extern void native_init_IRQ(void); + +extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs); + +extern void init_ISA_irqs(void); + +extern void __init init_IRQ(void); + +#ifdef CONFIG_X86_LOCAL_APIC +void arch_trigger_cpumask_backtrace(const struct cpumask *mask, + bool exclude_self); + +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +#endif + +#endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h new file mode 100644 index 000000000..7cc494321 --- /dev/null +++ b/arch/x86/include/asm/irq_remapping.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * + * This header file contains the interface of the interrupt remapping code to + * the x86 interrupt management code. + */ + +#ifndef __X86_IRQ_REMAPPING_H +#define __X86_IRQ_REMAPPING_H + +#include +#include +#include + +struct msi_msg; +struct irq_alloc_info; + +enum irq_remap_cap { + IRQ_POSTING_CAP = 0, +}; + +enum { + IRQ_REMAP_XAPIC_MODE, + IRQ_REMAP_X2APIC_MODE, +}; + +struct vcpu_data { + u64 pi_desc_addr; /* Physical address of PI Descriptor */ + u32 vector; /* Guest vector of the interrupt */ +}; + +#ifdef CONFIG_IRQ_REMAP + +extern raw_spinlock_t irq_2_ir_lock; + +extern bool irq_remapping_cap(enum irq_remap_cap cap); +extern void set_irq_remapping_broken(void); +extern int irq_remapping_prepare(void); +extern int irq_remapping_enable(void); +extern void irq_remapping_disable(void); +extern int irq_remapping_reenable(int); +extern int irq_remap_enable_fault_handling(void); +extern void panic_if_irq_remap(const char *msg); + +/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ +extern struct irq_domain * +arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); + +/* Get parent irqdomain for interrupt remapping irqdomain */ +static inline struct irq_domain *arch_get_ir_parent_domain(void) +{ + return x86_vector_domain; +} + +#else /* CONFIG_IRQ_REMAP */ + +static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } +static inline void set_irq_remapping_broken(void) { } +static inline int irq_remapping_prepare(void) { return -ENODEV; } +static inline int irq_remapping_enable(void) { return -ENODEV; } +static inline void irq_remapping_disable(void) { } +static inline int irq_remapping_reenable(int eim) { return -ENODEV; } +static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } + +static inline void panic_if_irq_remap(const char *msg) +{ +} + +#endif /* CONFIG_IRQ_REMAP */ +#endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h new file mode 100644 index 000000000..147cb8fdd --- /dev/null +++ b/arch/x86/include/asm/irq_stack.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_STACK_H +#define _ASM_X86_IRQ_STACK_H + +#include +#include + +#include + +#ifdef CONFIG_X86_64 + +/* + * Macro to inline switching to an interrupt stack and invoking function + * calls from there. The following rules apply: + * + * - Ordering: + * + * 1. Write the stack pointer into the top most place of the irq + * stack. This ensures that the various unwinders can link back to the + * original stack. + * + * 2. Switch the stack pointer to the top of the irq stack. + * + * 3. Invoke whatever needs to be done (@asm_call argument) + * + * 4. Pop the original stack pointer from the top of the irq stack + * which brings it back to the original stack where it left off. + * + * - Function invocation: + * + * To allow flexible usage of the macro, the actual function code including + * the store of the arguments in the call ABI registers is handed in via + * the @asm_call argument. + * + * - Local variables: + * + * @tos: + * The @tos variable holds a pointer to the top of the irq stack and + * _must_ be allocated in a non-callee saved register as this is a + * restriction coming from objtool. + * + * Note, that (tos) is both in input and output constraints to ensure + * that the compiler does not assume that R11 is left untouched in + * case this macro is used in some place where the per cpu interrupt + * stack pointer is used again afterwards + * + * - Function arguments: + * The function argument(s), if any, have to be defined in register + * variables at the place where this is invoked. Storing the + * argument(s) in the proper register(s) is part of the @asm_call + * + * - Constraints: + * + * The constraints have to be done very carefully because the compiler + * does not know about the assembly call. + * + * output: + * As documented already above the @tos variable is required to be in + * the output constraints to make the compiler aware that R11 cannot be + * reused after the asm() statement. + * + * For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is + * required as well as this prevents certain creative GCC variants from + * misplacing the ASM code. + * + * input: + * - func: + * Immediate, which tells the compiler that the function is referenced. + * + * - tos: + * Register. The actual register is defined by the variable declaration. + * + * - function arguments: + * The constraints are handed in via the 'argconstr' argument list. They + * describe the register arguments which are used in @asm_call. + * + * clobbers: + * Function calls can clobber anything except the callee-saved + * registers. Tell the compiler. + */ +#define call_on_stack(stack, func, asm_call, argconstr...) \ +{ \ + register void *tos asm("r11"); \ + \ + tos = ((void *)(stack)); \ + \ + asm_inline volatile( \ + "movq %%rsp, (%[tos]) \n" \ + "movq %[tos], %%rsp \n" \ + \ + asm_call \ + \ + "popq %%rsp \n" \ + \ + : "+r" (tos), ASM_CALL_CONSTRAINT \ + : [__func] "i" (func), [tos] "r" (tos) argconstr \ + : "cc", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", \ + "memory" \ + ); \ +} + +#define ASM_CALL_ARG0 \ + "call %P[__func] \n" \ + ASM_REACHABLE + +#define ASM_CALL_ARG1 \ + "movq %[arg1], %%rdi \n" \ + ASM_CALL_ARG0 + +#define ASM_CALL_ARG2 \ + "movq %[arg2], %%rsi \n" \ + ASM_CALL_ARG1 + +#define ASM_CALL_ARG3 \ + "movq %[arg3], %%rdx \n" \ + ASM_CALL_ARG2 + +#define call_on_irqstack(func, asm_call, argconstr...) \ + call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ + func, asm_call, argconstr) + +/* Macros to assert type correctness for run_*_on_irqstack macros */ +#define assert_function_type(func, proto) \ + static_assert(__builtin_types_compatible_p(typeof(&func), proto)) + +#define assert_arg_type(arg, proto) \ + static_assert(__builtin_types_compatible_p(typeof(arg), proto)) + +/* + * Macro to invoke system vector and device interrupt C handlers. + */ +#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...) \ +{ \ + /* \ + * User mode entry and interrupt on the irq stack do not \ + * switch stacks. If from user mode the task stack is empty. \ + */ \ + if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ + irq_enter_rcu(); \ + func(c_args); \ + irq_exit_rcu(); \ + } else { \ + /* \ + * Mark the irq stack inuse _before_ and unmark _after_ \ + * switching stacks. Interrupts are disabled in both \ + * places. Invoke the stack switch macro with the call \ + * sequence which matches the above direct invocation. \ + */ \ + __this_cpu_write(hardirq_stack_inuse, true); \ + call_on_irqstack(func, asm_call, constr); \ + __this_cpu_write(hardirq_stack_inuse, false); \ + } \ +} + +/* + * Function call sequence for __call_on_irqstack() for system vectors. + * + * Note that irq_enter_rcu() and irq_exit_rcu() do not use the input + * mechanism because these functions are global and cannot be optimized out + * when compiling a particular source file which uses one of these macros. + * + * The argument (regs) does not need to be pushed or stashed in a callee + * saved register to be safe vs. the irq_enter_rcu() call because the + * clobbers already prevent the compiler from storing it in a callee + * clobbered register. As the compiler has to preserve @regs for the final + * call to idtentry_exit() anyway, it's likely that it does not cause extra + * effort for this asm magic. + */ +#define ASM_CALL_SYSVEC \ + "call irq_enter_rcu \n" \ + ASM_CALL_ARG1 \ + "call irq_exit_rcu \n" + +#define SYSVEC_CONSTRAINTS , [arg1] "r" (regs) + +#define run_sysvec_on_irqstack_cond(func, regs) \ +{ \ + assert_function_type(func, void (*)(struct pt_regs *)); \ + assert_arg_type(regs, struct pt_regs *); \ + \ + call_on_irqstack_cond(func, regs, ASM_CALL_SYSVEC, \ + SYSVEC_CONSTRAINTS, regs); \ +} + +/* + * As in ASM_CALL_SYSVEC above the clobbers force the compiler to store + * @regs and @vector in callee saved registers. + */ +#define ASM_CALL_IRQ \ + "call irq_enter_rcu \n" \ + ASM_CALL_ARG2 \ + "call irq_exit_rcu \n" + +#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector) + +#define run_irq_on_irqstack_cond(func, regs, vector) \ +{ \ + assert_function_type(func, void (*)(struct pt_regs *, u32)); \ + assert_arg_type(regs, struct pt_regs *); \ + assert_arg_type(vector, u32); \ + \ + call_on_irqstack_cond(func, regs, ASM_CALL_IRQ, \ + IRQ_CONSTRAINTS, regs, vector); \ +} + +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK +/* + * Macro to invoke __do_softirq on the irq stack. This is only called from + * task context when bottom halves are about to be reenabled and soft + * interrupts are pending to be processed. The interrupt stack cannot be in + * use here. + */ +#define do_softirq_own_stack() \ +{ \ + __this_cpu_write(hardirq_stack_inuse, true); \ + call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ + __this_cpu_write(hardirq_stack_inuse, false); \ +} + +#endif + +#else /* CONFIG_X86_64 */ +/* System vector handlers always run on the stack they interrupted. */ +#define run_sysvec_on_irqstack_cond(func, regs) \ +{ \ + irq_enter_rcu(); \ + func(regs); \ + irq_exit_rcu(); \ +} + +/* Switches to the irq stack within func() */ +#define run_irq_on_irqstack_cond(func, regs, vector) \ +{ \ + irq_enter_rcu(); \ + func(regs, vector); \ + irq_exit_rcu(); \ +} + +#endif /* !CONFIG_X86_64 */ + +#endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h new file mode 100644 index 000000000..43dcb9284 --- /dev/null +++ b/arch/x86/include/asm/irq_vectors.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_VECTORS_H +#define _ASM_X86_IRQ_VECTORS_H + +#include +/* + * Linux IRQ vector layout. + * + * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can + * be defined by Linux. They are used as a jump table by the CPU when a + * given vector is triggered - by a CPU-external, CPU-internal or + * software-triggered event. + * + * Linux sets the kernel code address each entry jumps to early during + * bootup, and never changes them. This is the general layout of the + * IDT entries: + * + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... LOCAL_TIMER_VECTOR-1 + * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts + * + * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. + * + * This file enumerates the exact layout of them: + */ + +/* This is used as an interrupt vector when programming the APIC. */ +#define NMI_VECTOR 0x02 + +/* + * IDT vectors usable for external interrupt sources start at 0x20. + * (0x80 is the syscall vector, 0x30-0x3f are for ISA) + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +/* + * Reserve the lowest usable vector (and hence lowest priority) 0x20 for + * triggering cleanup after irq migration. 0x21-0x2f will still be used + * for device interrupts. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +#define IA32_SYSCALL_VECTOR 0x80 + +/* + * Vectors 0x30-0x3f are used for ISA interrupts. + * round up to the next 16-vector boundary + */ +#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + */ + +#define SPURIOUS_APIC_VECTOR 0xff +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define REBOOT_VECTOR 0xf8 + +/* + * Generic system vector for platform specific use + */ +#define X86_PLATFORM_IPI_VECTOR 0xf7 + +/* + * IRQ work vector: + */ +#define IRQ_WORK_VECTOR 0xf6 + +/* 0xf5 - unused, was UV_BAU_MESSAGE */ +#define DEFERRED_ERROR_VECTOR 0xf4 + +/* Vector on which hypervisor callbacks will be delivered */ +#define HYPERVISOR_CALLBACK_VECTOR 0xf3 + +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 +#define POSTED_INTR_NESTED_VECTOR 0xf0 +#endif + +#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef + +#if IS_ENABLED(CONFIG_HYPERV) +#define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#define HYPERV_STIMER0_VECTOR 0xed +#endif + +#define LOCAL_TIMER_VECTOR 0xec + +#define NR_VECTORS 256 + +#ifdef CONFIG_X86_LOCAL_APIC +#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#else +#define FIRST_SYSTEM_VECTOR NR_VECTORS +#endif + +#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) +#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR) + +/* + * Size the maximum number of interrupts. + * + * If the irq_desc[] array has a sparse layout, we can size things + * generously - it scales up linearly with the maximum number of CPUs, + * and the maximum number of IO-APICs, whichever is higher. + * + * In other cases we size more conservatively, to not create too large + * static arrays. + */ + +#define NR_IRQS_LEGACY 16 + +#define CPU_VECTOR_LIMIT (64 * NR_CPUS) +#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS) + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI) +#define NR_IRQS \ + (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) +#elif defined(CONFIG_X86_IO_APIC) +#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +#elif defined(CONFIG_PCI_MSI) +#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT) +#else +#define NR_IRQS NR_IRQS_LEGACY +#endif + +#endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h new file mode 100644 index 000000000..800ffce0d --- /dev/null +++ b/arch/x86/include/asm/irq_work.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IRQ_WORK_H +#define _ASM_IRQ_WORK_H + +#include + +#ifdef CONFIG_X86_LOCAL_APIC +static inline bool arch_irq_work_has_interrupt(void) +{ + return boot_cpu_has(X86_FEATURE_APIC); +} +extern void arch_irq_work_raise(void); +#else +static inline bool arch_irq_work_has_interrupt(void) +{ + return false; +} +#endif + +#endif /* _ASM_IRQ_WORK_H */ diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h new file mode 100644 index 000000000..125c23b7b --- /dev/null +++ b/arch/x86/include/asm/irqdomain.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IRQDOMAIN_H +#define _ASM_IRQDOMAIN_H + +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC +enum { + /* Allocate contiguous CPU vectors */ + X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, + X86_IRQ_ALLOC_LEGACY = 0x2, +}; + +extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec); +extern int x86_fwspec_is_hpet(struct irq_fwspec *fwspec); + +extern struct irq_domain *x86_vector_domain; + +extern void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask); +extern void copy_irq_alloc_info(struct irq_alloc_info *dst, + struct irq_alloc_info *src); +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +struct device_node; +struct irq_data; + +enum ioapic_domain_type { + IOAPIC_DOMAIN_INVALID, + IOAPIC_DOMAIN_LEGACY, + IOAPIC_DOMAIN_STRICT, + IOAPIC_DOMAIN_DYNAMIC, +}; + +struct ioapic_domain_cfg { + enum ioapic_domain_type type; + const struct irq_domain_ops *ops; + struct device_node *dev; +}; + +extern const struct irq_domain_ops mp_ioapic_irqdomain_ops; + +extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg); +extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +extern int mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool reserve); +extern void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data); +extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); +#endif /* CONFIG_X86_IO_APIC */ + +#ifdef CONFIG_PCI_MSI +void x86_create_pci_msi_domain(void); +struct irq_domain *native_create_pci_msi_domain(void); +extern struct irq_domain *x86_pci_msi_default_domain; +#else +static inline void x86_create_pci_msi_domain(void) { } +#define native_create_pci_msi_domain NULL +#define x86_pci_msi_default_domain NULL +#endif + +#endif diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h new file mode 100644 index 000000000..7793e52d6 --- /dev/null +++ b/arch/x86/include/asm/irqflags.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_IRQFLAGS_H_ +#define _X86_IRQFLAGS_H_ + +#include + +#ifndef __ASSEMBLY__ + +#include + +/* Provide __cpuidle; we can't safely include */ +#define __cpuidle __section(".cpuidle.text") + +/* + * Interrupt control: + */ + +/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ +extern inline unsigned long native_save_fl(void); +extern __always_inline unsigned long native_save_fl(void) +{ + unsigned long flags; + + /* + * "=rm" is safe here, because "pop" adjusts the stack before + * it evaluates its effective address -- this is part of the + * documented behavior of the "pop" instruction. + */ + asm volatile("# __raw_save_flags\n\t" + "pushf ; pop %0" + : "=rm" (flags) + : /* no input */ + : "memory"); + + return flags; +} + +static __always_inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static __always_inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline __cpuidle void native_safe_halt(void) +{ + mds_idle_clear_cpu_buffers(); + asm volatile("sti; hlt": : :"memory"); +} + +static inline __cpuidle void native_halt(void) +{ + mds_idle_clear_cpu_buffers(); + asm volatile("hlt": : :"memory"); +} + +#endif + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#ifndef __ASSEMBLY__ +#include + +static __always_inline unsigned long arch_local_save_flags(void) +{ + return native_save_fl(); +} + +static __always_inline void arch_local_irq_disable(void) +{ + native_irq_disable(); +} + +static __always_inline void arch_local_irq_enable(void) +{ + native_irq_enable(); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline __cpuidle void arch_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline __cpuidle void halt(void) +{ + native_halt(); +} + +/* + * For spinlocks, etc: + */ +static __always_inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags = arch_local_save_flags(); + arch_local_irq_disable(); + return flags; +} +#else + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS pushfq; popq %rax +#endif + +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT_XXL */ + +#ifndef __ASSEMBLY__ +static __always_inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static __always_inline int arch_irqs_disabled(void) +{ + unsigned long flags = arch_local_save_flags(); + + return arch_irqs_disabled_flags(flags); +} + +static __always_inline void arch_local_irq_restore(unsigned long flags) +{ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_enable(); +} +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h new file mode 100644 index 000000000..7ede2731d --- /dev/null +++ b/arch/x86/include/asm/ist.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Include file for the interface to IST BIOS + * Copyright 2002 Andy Grover + */ +#ifndef _ASM_X86_IST_H +#define _ASM_X86_IST_H + +#include + + +extern struct ist_info ist_info; + +#endif /* _ASM_X86_IST_H */ diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h new file mode 100644 index 000000000..a34897aef --- /dev/null +++ b/arch/x86/include/asm/jailhouse_para.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Jailhouse paravirt detection + * + * Copyright (c) Siemens AG, 2015-2017 + * + * Authors: + * Jan Kiszka + */ + +#ifndef _ASM_X86_JAILHOUSE_PARA_H +#define _ASM_X86_JAILHOUSE_PARA_H + +#include + +#ifdef CONFIG_JAILHOUSE_GUEST +bool jailhouse_paravirt(void); +#else +static inline bool jailhouse_paravirt(void) +{ + return false; +} +#endif + +#endif /* _ASM_X86_JAILHOUSE_PARA_H */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h new file mode 100644 index 000000000..071572e23 --- /dev/null +++ b/arch/x86/include/asm/jump_label.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_JUMP_LABEL_H +#define _ASM_X86_JUMP_LABEL_H + +#define HAVE_JUMP_LABEL_BATCH + +#include +#include + +#ifndef __ASSEMBLY__ + +#include +#include + +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ".long 1b - . \n\t" \ + ".long %l[l_yes] - . \n\t" \ + _ASM_PTR "%c0 + %c1 - .\n\t" \ + ".popsection \n\t" + +#ifdef CONFIG_HAVE_JUMP_LABEL_HACK + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes] # objtool NOPs this \n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (2 | branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */ + +static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) +{ + asm_volatile_goto("1:" + ".byte " __stringify(BYTES_NOP5) "\n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */ + +static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes]\n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (branch) : : l_yes); + + return false; +l_yes: + return true; +} + +extern int arch_jump_entry_size(struct jump_entry *entry); + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h new file mode 100644 index 000000000..13e70da38 --- /dev/null +++ b/arch/x86/include/asm/kasan.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KASAN_H +#define _ASM_X86_KASAN_H + +#include +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_SCALE_SHIFT 3 + +/* + * Compiler uses shadow offset assuming that addresses start + * from 0. Kernel addresses don't start from 0, so shadow + * for kernel really starts from compiler's shadow offset + + * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT + */ +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ + ((-1UL << __VIRTUAL_MASK_SHIFT) >> \ + KASAN_SHADOW_SCALE_SHIFT)) +/* + * 47 bits for kernel address -> (47 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow + * 56 bits for kernel address -> (56 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow + */ +#define KASAN_SHADOW_END (KASAN_SHADOW_START + \ + (1ULL << (__VIRTUAL_MASK_SHIFT - \ + KASAN_SHADOW_SCALE_SHIFT))) + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN +void __init kasan_early_init(void); +void __init kasan_init(void); +#else +static inline void kasan_early_init(void) { } +static inline void kasan_init(void) { } +#endif + +#endif + +#endif diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h new file mode 100644 index 000000000..064819046 --- /dev/null +++ b/arch/x86/include/asm/kaslr.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_KASLR_H_ +#define _ASM_KASLR_H_ + +unsigned long kaslr_get_random_long(const char *purpose); + +#ifdef CONFIG_RANDOMIZE_MEMORY +void kernel_randomize_memory(void); +void init_trampoline_kaslr(void); +#else +static inline void kernel_randomize_memory(void) { } +static inline void init_trampoline_kaslr(void) {} +#endif /* CONFIG_RANDOMIZE_MEMORY */ + +#endif diff --git a/arch/x86/include/asm/kbdleds.h b/arch/x86/include/asm/kbdleds.h new file mode 100644 index 000000000..197ea4fed --- /dev/null +++ b/arch/x86/include/asm/kbdleds.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KBDLEDS_H +#define _ASM_X86_KBDLEDS_H + +/* + * Some laptops take the 789uiojklm,. keys as number pad when NumLock is on. + * This seems a good reason to start with NumLock off. That's why on X86 we + * ask the bios for the correct state. + */ + +#include + +static inline int kbd_defleds(void) +{ + return boot_params.kbd_status & 0x20 ? (1 << VC_NUMLOCK) : 0; +} + +#endif /* _ASM_X86_KBDLEDS_H */ diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h new file mode 100644 index 000000000..d1514e704 --- /dev/null +++ b/arch/x86/include/asm/kdebug.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KDEBUG_H +#define _ASM_X86_KDEBUG_H + +#include + +struct pt_regs; + +/* Grossly misnamed. */ +enum die_val { + DIE_OOPS = 1, + DIE_INT3, + DIE_DEBUG, + DIE_PANIC, + DIE_NMI, + DIE_DIE, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, + DIE_CALL, + DIE_PAGE_FAULT, + DIE_NMIUNKNOWN, +}; + +enum show_regs_mode { + SHOW_REGS_SHORT, + /* + * For when userspace crashed, but we don't think it's our fault, and + * therefore don't print kernel registers. + */ + SHOW_REGS_USER, + SHOW_REGS_ALL +}; + +extern void die(const char *, struct pt_regs *,long); +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr); +extern int __must_check __die(const char *, struct pt_regs *, long); +extern void show_stack_regs(struct pt_regs *regs); +extern void __show_regs(struct pt_regs *regs, enum show_regs_mode, + const char *log_lvl); +extern void show_iret_regs(struct pt_regs *regs, const char *log_lvl); +extern unsigned long oops_begin(void); +extern void oops_end(unsigned long, struct pt_regs *, int signr); + +#endif /* _ASM_X86_KDEBUG_H */ diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h new file mode 100644 index 000000000..df89ee7d3 --- /dev/null +++ b/arch/x86/include/asm/kexec-bzimage64.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_KEXEC_BZIMAGE64_H +#define _ASM_KEXEC_BZIMAGE64_H + +extern const struct kexec_file_ops kexec_bzImage64_ops; + +#endif /* _ASM_KEXE_BZIMAGE64_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h new file mode 100644 index 000000000..256eee99a --- /dev/null +++ b/arch/x86/include/asm/kexec.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KEXEC_H +#define _ASM_X86_KEXEC_H + +#ifdef CONFIG_X86_32 +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_PGD 2 +# define PA_SWAP_PAGE 3 +# define PAGES_NR 4 +#else +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_TABLE_PAGE 2 +# define PA_SWAP_PAGE 3 +# define PAGES_NR 4 +#endif + +# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 + +#ifndef __ASSEMBLY__ + +#include +#include + +#include +#include +#include + +struct kimage; + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * So far x86_64 is limited to 40 physical address bits. + */ +#ifdef CONFIG_X86_32 +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +# define KEXEC_CONTROL_PAGE_SIZE 4096 + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_386 + +/* We can also handle crash dumps from 64 bit kernel. */ +# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) +#else +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1) +/* Maximum address we can use for the control pages */ +# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1) + +/* Allocate one page for the pdp and the second for the code */ +# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_X86_64 +#endif + +/* + * This function is responsible for capturing register states if coming + * via panic otherwise just fix up the ss and sp if coming via kernel + * mode exception. + */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + } else { +#ifdef CONFIG_X86_32 + asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); + asm volatile("movl %%ecx,%0" : "=m"(newregs->cx)); + asm volatile("movl %%edx,%0" : "=m"(newregs->dx)); + asm volatile("movl %%esi,%0" : "=m"(newregs->si)); + asm volatile("movl %%edi,%0" : "=m"(newregs->di)); + asm volatile("movl %%ebp,%0" : "=m"(newregs->bp)); + asm volatile("movl %%eax,%0" : "=m"(newregs->ax)); + asm volatile("movl %%esp,%0" : "=m"(newregs->sp)); + asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); + asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); + asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds)); + asm volatile("movl %%es, %%eax;" :"=a"(newregs->es)); + asm volatile("pushfl; popl %0" :"=m"(newregs->flags)); +#else + asm volatile("movq %%rbx,%0" : "=m"(newregs->bx)); + asm volatile("movq %%rcx,%0" : "=m"(newregs->cx)); + asm volatile("movq %%rdx,%0" : "=m"(newregs->dx)); + asm volatile("movq %%rsi,%0" : "=m"(newregs->si)); + asm volatile("movq %%rdi,%0" : "=m"(newregs->di)); + asm volatile("movq %%rbp,%0" : "=m"(newregs->bp)); + asm volatile("movq %%rax,%0" : "=m"(newregs->ax)); + asm volatile("movq %%rsp,%0" : "=m"(newregs->sp)); + asm volatile("movq %%r8,%0" : "=m"(newregs->r8)); + asm volatile("movq %%r9,%0" : "=m"(newregs->r9)); + asm volatile("movq %%r10,%0" : "=m"(newregs->r10)); + asm volatile("movq %%r11,%0" : "=m"(newregs->r11)); + asm volatile("movq %%r12,%0" : "=m"(newregs->r12)); + asm volatile("movq %%r13,%0" : "=m"(newregs->r13)); + asm volatile("movq %%r14,%0" : "=m"(newregs->r14)); + asm volatile("movq %%r15,%0" : "=m"(newregs->r15)); + asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); + asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); + asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); +#endif + newregs->ip = _THIS_IP_; + } +} + +#ifdef CONFIG_X86_32 +asmlinkage unsigned long +relocate_kernel(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae, + unsigned int preserve_context); +#else +unsigned long +relocate_kernel(unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address, + unsigned int preserve_context, + unsigned int host_mem_enc_active); +#endif + +#define ARCH_HAS_KIMAGE_ARCH + +#ifdef CONFIG_X86_32 +struct kimage_arch { + pgd_t *pgd; +#ifdef CONFIG_X86_PAE + pmd_t *pmd0; + pmd_t *pmd1; +#endif + pte_t *pte0; + pte_t *pte1; +}; +#else +struct kimage_arch { + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +}; +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_X86_64 +/* + * Number of elements and order of elements in this structure should match + * with the ones in arch/x86/purgatory/entry64.S. If you make a change here + * make an appropriate change in purgatory too. + */ +struct kexec_entry64_regs { + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; +}; + +extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, + gfp_t gfp); +#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages + +extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); +#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages + +void arch_kexec_protect_crashkres(void); +#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres + +void arch_kexec_unprotect_crashkres(void); +#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres + +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +void *arch_kexec_kernel_image_load(struct kimage *image); +#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup +#endif +#endif + +extern void kdump_nmi_shootdown_cpus(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h new file mode 100644 index 000000000..ff5c7134a --- /dev/null +++ b/arch/x86/include/asm/kfence.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 KFENCE support. + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef _ASM_X86_KFENCE_H +#define _ASM_X86_KFENCE_H + +#ifndef MODULE + +#include +#include + +#include +#include +#include +#include + +/* Force 4K pages for __kfence_pool. */ +static inline bool arch_kfence_init_pool(void) +{ + unsigned long addr; + + for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr); + addr += PAGE_SIZE) { + unsigned int level; + + if (!lookup_address(addr, &level)) + return false; + + if (level != PG_LEVEL_4K) + set_memory_4k(addr, 1); + } + + return true; +} + +/* Protect the given page and flush TLB. */ +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + unsigned int level; + pte_t *pte = lookup_address(addr, &level); + + if (WARN_ON(!pte || level != PG_LEVEL_4K)) + return false; + + /* + * We need to avoid IPIs, as we may get KFENCE allocations or faults + * with interrupts disabled. Therefore, the below is best-effort, and + * does not flush TLBs on all CPUs. We can tolerate some inaccuracy; + * lazy fault handling takes care of faults after the page is PRESENT. + */ + + if (protect) + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + else + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + + /* + * Flush this CPU's TLB, assuming whoever did the allocation/free is + * likely to continue running on this CPU. + */ + preempt_disable(); + flush_tlb_one_kernel(addr); + preempt_enable(); + return true; +} + +#endif /* !MODULE */ + +#endif /* _ASM_X86_KFENCE_H */ diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h new file mode 100644 index 000000000..aacaf2502 --- /dev/null +++ b/arch/x86/include/asm/kgdb.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KGDB_H +#define _ASM_X86_KGDB_H + +/* + * Copyright (C) 2001-2004 Amit S. Kale + * Copyright (C) 2008 Wind River Systems, Inc. + */ + +#include + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets + * Longer buffer is needed to list all threads + */ +#define BUFMAX 1024 + +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +#ifdef CONFIG_X86_32 +enum regnames { + GDB_AX, /* 0 */ + GDB_CX, /* 1 */ + GDB_DX, /* 2 */ + GDB_BX, /* 3 */ + GDB_SP, /* 4 */ + GDB_BP, /* 5 */ + GDB_SI, /* 6 */ + GDB_DI, /* 7 */ + GDB_PC, /* 8 also known as eip */ + GDB_PS, /* 9 also known as eflags */ + GDB_CS, /* 10 */ + GDB_SS, /* 11 */ + GDB_DS, /* 12 */ + GDB_ES, /* 13 */ + GDB_FS, /* 14 */ + GDB_GS, /* 15 */ +}; +#define GDB_ORIG_AX 41 +#define DBG_MAX_REG_NUM 16 +#define NUMREGBYTES ((GDB_GS+1)*4) +#else /* ! CONFIG_X86_32 */ +enum regnames { + GDB_AX, /* 0 */ + GDB_BX, /* 1 */ + GDB_CX, /* 2 */ + GDB_DX, /* 3 */ + GDB_SI, /* 4 */ + GDB_DI, /* 5 */ + GDB_BP, /* 6 */ + GDB_SP, /* 7 */ + GDB_R8, /* 8 */ + GDB_R9, /* 9 */ + GDB_R10, /* 10 */ + GDB_R11, /* 11 */ + GDB_R12, /* 12 */ + GDB_R13, /* 13 */ + GDB_R14, /* 14 */ + GDB_R15, /* 15 */ + GDB_PC, /* 16 */ + GDB_PS, /* 17 */ + GDB_CS, /* 18 */ + GDB_SS, /* 19 */ + GDB_DS, /* 20 */ + GDB_ES, /* 21 */ + GDB_FS, /* 22 */ + GDB_GS, /* 23 */ +}; +#define GDB_ORIG_AX 57 +#define DBG_MAX_REG_NUM 24 +/* 17 64 bit regs and 5 32 bit regs */ +#define NUMREGBYTES ((17 * 8) + (5 * 4)) +#endif /* ! CONFIG_X86_32 */ + +static inline void arch_kgdb_breakpoint(void) +{ + asm(" int $3"); +} +#define BREAK_INSTR_SIZE 1 +#define CACHE_FLUSH_IS_SAFE 1 +#define GDB_ADJUSTS_BREAK_OFFSET + +extern int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig); + +#endif /* _ASM_X86_KGDB_H */ diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h new file mode 100644 index 000000000..8fa6ac0e2 --- /dev/null +++ b/arch/x86/include/asm/kmsan.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 KMSAN support. + * + * Copyright (C) 2022, Google LLC + * Author: Alexander Potapenko + */ + +#ifndef _ASM_X86_KMSAN_H +#define _ASM_X86_KMSAN_H + +#ifndef MODULE + +#include +#include +#include + +DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow); +DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin); + +/* + * Functions below are declared in the header to make sure they are inlined. + * They all are called from kmsan_get_metadata() for every memory access in + * the kernel, so speed is important here. + */ + +/* + * Compute metadata addresses for the CPU entry area on x86. + */ +static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) +{ + unsigned long addr64 = (unsigned long)addr; + char *metadata_array; + unsigned long off; + int cpu; + + if ((addr64 < CPU_ENTRY_AREA_BASE) || + (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE))) + return NULL; + cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE; + off = addr64 - (unsigned long)get_cpu_entry_area(cpu); + if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE)) + return NULL; + metadata_array = is_origin ? cpu_entry_area_origin : + cpu_entry_area_shadow; + return &per_cpu(metadata_array[off], cpu); +} + +/* + * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version. + */ +static inline bool kmsan_phys_addr_valid(unsigned long addr) +{ + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) + return !(addr >> boot_cpu_data.x86_phys_bits); + else + return true; +} + +/* + * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version. + */ +static inline bool kmsan_virt_addr_valid(void *addr) +{ + unsigned long x = (unsigned long)addr; + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + if (y >= KERNEL_IMAGE_SIZE) + return false; + } else { + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + if ((x > y) || !kmsan_phys_addr_valid(x)) + return false; + } + + return pfn_valid(x >> PAGE_SHIFT); +} + +#endif /* !MODULE */ + +#endif /* _ASM_X86_KMSAN_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h new file mode 100644 index 000000000..a2e9317aa --- /dev/null +++ b/arch/x86/include/asm/kprobes.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_KPROBES_H +#define _ASM_X86_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * See arch/x86/kernel/kprobes.c for x86 kprobes history. + */ + +#include + +#ifdef CONFIG_KPROBES +#include +#include +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct pt_regs; +struct kprobe; + +typedef u8 kprobe_opcode_t; + +#define MAX_STACK_SIZE 64 +#define CUR_STACK_SIZE(ADDR) \ + (current_top_of_stack() - (unsigned long)(ADDR)) +#define MIN_STACK_SIZE(ADDR) \ + (MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ? \ + MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR)) + +#define flush_insn_slot(p) do { } while (0) + +/* optinsn template addresses */ +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_clac[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) +#define MAX_OPTINSN_SIZE \ + (((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) + \ + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) + +extern const int kretprobe_blacklist_size; + +void arch_remove_kprobe(struct kprobe *p); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; + /* + * boostable = 0: This instruction type is not boostable. + * boostable = 1: This instruction has been boosted: we have + * added a relative jump after the instruction copy in insn, + * so no single-step and fixup are needed (unless there's + * a post_handler). + */ + unsigned boostable:1; + unsigned char size; /* The size of insn */ + union { + unsigned char opcode; + struct { + unsigned char type; + } jcc; + struct { + unsigned char type; + unsigned char asize; + } loop; + struct { + unsigned char reg; + } indirect; + }; + s32 rel32; /* relative offset must be s32, s16, or s8 */ + void (*emulate_op)(struct kprobe *p, struct pt_regs *regs); + /* Number of bytes of text poked */ + int tp_len; +}; + +struct arch_optimized_insn { + /* copy of the original instructions */ + kprobe_opcode_t copied_insn[DISP32_SIZE]; + /* detour code buffer */ + kprobe_opcode_t *insn; + /* the size of instructions copied to detour code buffer */ + size_t size; +}; + +/* Return true (!0) if optinsn is prepared for optimization. */ +static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) +{ + return optinsn->size; +} + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long old_flags; + unsigned long saved_flags; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_old_flags; + unsigned long kprobe_saved_flags; + struct prev_kprobe prev_kprobe; +}; + +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +extern int kprobe_int3_handler(struct pt_regs *regs); + +#else + +static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; } + +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h new file mode 100644 index 000000000..abc07d004 --- /dev/null +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL) +BUILD_BUG_ON(1) +#endif + +/* + * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate + * both DECLARE/DEFINE_STATIC_CALL() invocations and + * "static_call_update()" calls. + * + * KVM_X86_OP_OPTIONAL() can be used for those functions that can have + * a NULL definition, for example if "static_call_cond()" will be used + * at the call sites. KVM_X86_OP_OPTIONAL_RET0() can be used likewise + * to make a definition optional, but in this case the default will + * be __static_call_return0. + */ +KVM_X86_OP(hardware_enable) +KVM_X86_OP(hardware_disable) +KVM_X86_OP(hardware_unsetup) +KVM_X86_OP(has_emulated_msr) +KVM_X86_OP(vcpu_after_set_cpuid) +KVM_X86_OP(vm_init) +KVM_X86_OP_OPTIONAL(vm_destroy) +KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate) +KVM_X86_OP(vcpu_create) +KVM_X86_OP(vcpu_free) +KVM_X86_OP(vcpu_reset) +KVM_X86_OP(prepare_switch_to_guest) +KVM_X86_OP(vcpu_load) +KVM_X86_OP(vcpu_put) +KVM_X86_OP(update_exception_bitmap) +KVM_X86_OP(get_msr) +KVM_X86_OP(set_msr) +KVM_X86_OP(get_segment_base) +KVM_X86_OP(get_segment) +KVM_X86_OP(get_cpl) +KVM_X86_OP(set_segment) +KVM_X86_OP(get_cs_db_l_bits) +KVM_X86_OP(is_valid_cr0) +KVM_X86_OP(set_cr0) +KVM_X86_OP_OPTIONAL(post_set_cr3) +KVM_X86_OP(is_valid_cr4) +KVM_X86_OP(set_cr4) +KVM_X86_OP(set_efer) +KVM_X86_OP(get_idt) +KVM_X86_OP(set_idt) +KVM_X86_OP(get_gdt) +KVM_X86_OP(set_gdt) +KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr7) +KVM_X86_OP(cache_reg) +KVM_X86_OP(get_rflags) +KVM_X86_OP(set_rflags) +KVM_X86_OP(get_if_flag) +KVM_X86_OP(flush_tlb_all) +KVM_X86_OP(flush_tlb_current) +KVM_X86_OP_OPTIONAL(tlb_remote_flush) +KVM_X86_OP_OPTIONAL(tlb_remote_flush_with_range) +KVM_X86_OP(flush_tlb_gva) +KVM_X86_OP(flush_tlb_guest) +KVM_X86_OP(vcpu_pre_run) +KVM_X86_OP(vcpu_run) +KVM_X86_OP(handle_exit) +KVM_X86_OP(skip_emulated_instruction) +KVM_X86_OP_OPTIONAL(update_emulated_instruction) +KVM_X86_OP(set_interrupt_shadow) +KVM_X86_OP(get_interrupt_shadow) +KVM_X86_OP(patch_hypercall) +KVM_X86_OP(inject_irq) +KVM_X86_OP(inject_nmi) +KVM_X86_OP(inject_exception) +KVM_X86_OP(cancel_injection) +KVM_X86_OP(interrupt_allowed) +KVM_X86_OP(nmi_allowed) +KVM_X86_OP(get_nmi_mask) +KVM_X86_OP(set_nmi_mask) +KVM_X86_OP(enable_nmi_window) +KVM_X86_OP(enable_irq_window) +KVM_X86_OP_OPTIONAL(update_cr8_intercept) +KVM_X86_OP(check_apicv_inhibit_reasons) +KVM_X86_OP(refresh_apicv_exec_ctrl) +KVM_X86_OP_OPTIONAL(hwapic_irr_update) +KVM_X86_OP_OPTIONAL(hwapic_isr_update) +KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt) +KVM_X86_OP_OPTIONAL(load_eoi_exitmap) +KVM_X86_OP_OPTIONAL(set_virtual_apic_mode) +KVM_X86_OP_OPTIONAL(set_apic_access_page_addr) +KVM_X86_OP(deliver_interrupt) +KVM_X86_OP_OPTIONAL(sync_pir_to_irr) +KVM_X86_OP_OPTIONAL_RET0(set_tss_addr) +KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr) +KVM_X86_OP_OPTIONAL_RET0(get_mt_mask) +KVM_X86_OP(load_mmu_pgd) +KVM_X86_OP(has_wbinvd_exit) +KVM_X86_OP(get_l2_tsc_offset) +KVM_X86_OP(get_l2_tsc_multiplier) +KVM_X86_OP(write_tsc_offset) +KVM_X86_OP(write_tsc_multiplier) +KVM_X86_OP(get_exit_info) +KVM_X86_OP(check_intercept) +KVM_X86_OP(handle_exit_irqoff) +KVM_X86_OP(request_immediate_exit) +KVM_X86_OP(sched_in) +KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging) +KVM_X86_OP_OPTIONAL(vcpu_blocking) +KVM_X86_OP_OPTIONAL(vcpu_unblocking) +KVM_X86_OP_OPTIONAL(pi_update_irte) +KVM_X86_OP_OPTIONAL(pi_start_assignment) +KVM_X86_OP_OPTIONAL(apicv_pre_state_restore) +KVM_X86_OP_OPTIONAL(apicv_post_state_restore) +KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) +KVM_X86_OP_OPTIONAL(set_hv_timer) +KVM_X86_OP_OPTIONAL(cancel_hv_timer) +KVM_X86_OP(setup_mce) +KVM_X86_OP(smi_allowed) +KVM_X86_OP(enter_smm) +KVM_X86_OP(leave_smm) +KVM_X86_OP(enable_smi_window) +KVM_X86_OP_OPTIONAL(mem_enc_ioctl) +KVM_X86_OP_OPTIONAL(mem_enc_register_region) +KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) +KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) +KVM_X86_OP_OPTIONAL(vm_move_enc_context_from) +KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) +KVM_X86_OP(get_msr_feature) +KVM_X86_OP(can_emulate_instruction) +KVM_X86_OP(apic_init_signal_blocked) +KVM_X86_OP_OPTIONAL(enable_direct_tlbflush) +KVM_X86_OP_OPTIONAL(migrate_timers) +KVM_X86_OP(msr_filter_changed) +KVM_X86_OP(complete_emulated_msr) +KVM_X86_OP(vcpu_deliver_sipi_vector) +KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); + +#undef KVM_X86_OP +#undef KVM_X86_OP_OPTIONAL +#undef KVM_X86_OP_OPTIONAL_RET0 diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h new file mode 100644 index 000000000..c17e3e96f --- /dev/null +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL) +BUILD_BUG_ON(1) +#endif + +/* + * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate + * both DECLARE/DEFINE_STATIC_CALL() invocations and + * "static_call_update()" calls. + * + * KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have + * a NULL definition, for example if "static_call_cond()" will be used + * at the call sites. + */ +KVM_X86_PMU_OP(hw_event_available) +KVM_X86_PMU_OP(pmc_is_enabled) +KVM_X86_PMU_OP(pmc_idx_to_pmc) +KVM_X86_PMU_OP(rdpmc_ecx_to_pmc) +KVM_X86_PMU_OP(msr_idx_to_pmc) +KVM_X86_PMU_OP(is_valid_rdpmc_ecx) +KVM_X86_PMU_OP(is_valid_msr) +KVM_X86_PMU_OP(get_msr) +KVM_X86_PMU_OP(set_msr) +KVM_X86_PMU_OP(refresh) +KVM_X86_PMU_OP(init) +KVM_X86_PMU_OP(reset) +KVM_X86_PMU_OP_OPTIONAL(deliver_pmi) +KVM_X86_PMU_OP_OPTIONAL(cleanup) + +#undef KVM_X86_PMU_OP +#undef KVM_X86_PMU_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h new file mode 100644 index 000000000..dfcdcafe3 --- /dev/null +++ b/arch/x86/include/asm/kvm_host.h @@ -0,0 +1,2118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Kernel-based Virtual Machine driver for Linux + * + * This header defines architecture specific interfaces, x86 version + */ + +#ifndef _ASM_X86_KVM_HOST_H +#define _ASM_X86_KVM_HOST_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + +#define KVM_MAX_VCPUS 1024 + +/* + * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs + * might be larger than the actual number of VCPUs because the + * APIC ID encodes CPU topology information. + * + * In the worst case, we'll need less than one extra bit for the + * Core ID, and less than one extra bit for the Package (Die) ID, + * so ratio of 4 should be enough. + */ +#define KVM_VCPU_ID_RATIO 4 +#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) + +/* memory slots that are not exposed to userspace */ +#define KVM_INTERNAL_MEM_SLOTS 3 + +#define KVM_HALT_POLL_NS_DEFAULT 200000 + +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS + +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + +#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ + KVM_BUS_LOCK_DETECTION_EXIT) + +#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ + KVM_X86_NOTIFY_VMEXIT_USER) + +/* x86-specific vcpu->requests bit members */ +#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) +#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) +#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) +#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) +#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) +#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) +#define KVM_REQ_EVENT KVM_ARCH_REQ(6) +#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) +#define KVM_REQ_NMI KVM_ARCH_REQ(9) +#define KVM_REQ_PMU KVM_ARCH_REQ(10) +#define KVM_REQ_PMI KVM_ARCH_REQ(11) +#define KVM_REQ_SMI KVM_ARCH_REQ(12) +#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) +#define KVM_REQ_MCLOCK_INPROGRESS \ + KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SCAN_IOAPIC \ + KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) +#define KVM_REQ_APIC_PAGE_RELOAD \ + KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) +#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) +#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) +#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) +#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) +#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) +#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) +#define KVM_REQ_APICV_UPDATE \ + KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) +#define KVM_REQ_TLB_FLUSH_GUEST \ + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_APF_READY KVM_ARCH_REQ(28) +#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) +#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ + KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ + KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) + +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ + | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ + | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + + +#define INVALID_PAGE (~(hpa_t)0) +#define VALID_PAGE(x) ((x) != INVALID_PAGE) + +#define INVALID_GPA (~(gpa_t)0) + +/* KVM Hugepage definitions for x86 */ +#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G +#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) +#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) +#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) +#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) +#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) +#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) + +#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50 +#define KVM_MIN_ALLOC_MMU_PAGES 64UL +#define KVM_MMU_HASH_SHIFT 12 +#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 256 +#define KVM_NR_FIXED_MTRR_REGION 88 +#define KVM_NR_VAR_MTRR 8 + +#define ASYNC_PF_PER_VCPU 64 + +enum kvm_reg { + VCPU_REGS_RAX = __VCPU_REGS_RAX, + VCPU_REGS_RCX = __VCPU_REGS_RCX, + VCPU_REGS_RDX = __VCPU_REGS_RDX, + VCPU_REGS_RBX = __VCPU_REGS_RBX, + VCPU_REGS_RSP = __VCPU_REGS_RSP, + VCPU_REGS_RBP = __VCPU_REGS_RBP, + VCPU_REGS_RSI = __VCPU_REGS_RSI, + VCPU_REGS_RDI = __VCPU_REGS_RDI, +#ifdef CONFIG_X86_64 + VCPU_REGS_R8 = __VCPU_REGS_R8, + VCPU_REGS_R9 = __VCPU_REGS_R9, + VCPU_REGS_R10 = __VCPU_REGS_R10, + VCPU_REGS_R11 = __VCPU_REGS_R11, + VCPU_REGS_R12 = __VCPU_REGS_R12, + VCPU_REGS_R13 = __VCPU_REGS_R13, + VCPU_REGS_R14 = __VCPU_REGS_R14, + VCPU_REGS_R15 = __VCPU_REGS_R15, +#endif + VCPU_REGS_RIP, + NR_VCPU_REGS, + + VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR0, + VCPU_EXREG_CR3, + VCPU_EXREG_CR4, + VCPU_EXREG_RFLAGS, + VCPU_EXREG_SEGMENTS, + VCPU_EXREG_EXIT_INFO_1, + VCPU_EXREG_EXIT_INFO_2, +}; + +enum { + VCPU_SREG_ES, + VCPU_SREG_CS, + VCPU_SREG_SS, + VCPU_SREG_DS, + VCPU_SREG_FS, + VCPU_SREG_GS, + VCPU_SREG_TR, + VCPU_SREG_LDTR, +}; + +enum exit_fastpath_completion { + EXIT_FASTPATH_NONE, + EXIT_FASTPATH_REENTER_GUEST, + EXIT_FASTPATH_EXIT_HANDLED, +}; +typedef enum exit_fastpath_completion fastpath_t; + +struct x86_emulate_ctxt; +struct x86_exception; +enum x86_intercept; +enum x86_intercept_stage; + +#define KVM_NR_DB_REGS 4 + +#define DR6_BUS_LOCK (1 << 11) +#define DR6_BD (1 << 13) +#define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) +#define DR6_RTM (1 << 16) +/* + * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. + * We can regard all the bits in DR6_FIXED_1 as active_low bits; + * they will never be 0 for now, but when they are defined + * in the future it will require no code change. + * + * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. + */ +#define DR6_ACTIVE_LOW 0xffff0ff0 +#define DR6_VOLATILE 0x0001e80f +#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) + +#define DR7_BP_EN_MASK 0x000000ff +#define DR7_GE (1 << 9) +#define DR7_GD (1 << 13) +#define DR7_FIXED_1 0x00000400 +#define DR7_VOLATILE 0xffff2bff + +#define KVM_GUESTDBG_VALID_MASK \ + (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_SINGLESTEP | \ + KVM_GUESTDBG_USE_HW_BP | \ + KVM_GUESTDBG_USE_SW_BP | \ + KVM_GUESTDBG_INJECT_BP | \ + KVM_GUESTDBG_INJECT_DB | \ + KVM_GUESTDBG_BLOCKIRQ) + + +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 +#define PFERR_GUEST_FINAL_BIT 32 +#define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 + +#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) +#define PFERR_USER_MASK (1U << PFERR_USER_BIT) +#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) +#define PFERR_PK_MASK (1U << PFERR_PK_BIT) +#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT) + +#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ + PFERR_WRITE_MASK | \ + PFERR_PRESENT_MASK) + +/* apic attention bits */ +#define KVM_APIC_CHECK_VAPIC 0 +/* + * The following bit is set with PV-EOI, unset on EOI. + * We detect PV-EOI changes by guest by comparing + * this bit with PV-EOI in guest memory. + * See the implementation in apic_update_pv_eoi. + */ +#define KVM_APIC_PV_EOI_PENDING 1 + +struct kvm_kernel_irq_routing_entry; + +/* + * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page + * also includes TDP pages) to determine whether or not a page can be used in + * the given MMU context. This is a subset of the overall kvm_cpu_role to + * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating + * 2 bytes per gfn instead of 4 bytes per gfn. + * + * Upper-level shadow pages having gptes are tracked for write-protection via + * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create + * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise + * gfn_track will overflow and explosions will ensure. + * + * A unique shadow page (SP) for a gfn is created if and only if an existing SP + * cannot be reused. The ability to reuse a SP is tracked by its role, which + * incorporates various mode bits and properties of the SP. Roughly speaking, + * the number of unique SPs that can theoretically be created is 2^n, where n + * is the number of bits that are used to compute the role. + * + * But, even though there are 19 bits in the mask below, not all combinations + * of modes and flags are possible: + * + * - invalid shadow pages are not accounted, so the bits are effectively 18 + * + * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); + * execonly and ad_disabled are only used for nested EPT which has + * has_4_byte_gpte=0. Therefore, 2 bits are always unused. + * + * - the 4 bits of level are effectively limited to the values 2/3/4/5, + * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE + * paging has exactly one upper level, making level completely redundant + * when has_4_byte_gpte=1. + * + * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if + * cr0_wp=0, therefore these three bits only give rise to 5 possibilities. + * + * Therefore, the maximum number of possible upper-level shadow pages for a + * single gfn is a bit less than 2^13. + */ +union kvm_mmu_page_role { + u32 word; + struct { + unsigned level:4; + unsigned has_4_byte_gpte:1; + unsigned quadrant:2; + unsigned direct:1; + unsigned access:3; + unsigned invalid:1; + unsigned efer_nx:1; + unsigned cr0_wp:1; + unsigned smep_andnot_wp:1; + unsigned smap_andnot_wp:1; + unsigned ad_disabled:1; + unsigned guest_mode:1; + unsigned passthrough:1; + unsigned :5; + + /* + * This is left at the top of the word so that + * kvm_memslots_for_spte_role can extract it with a + * simple shift. While there is room, give it a whole + * byte so it is also faster to load it from memory. + */ + unsigned smm:8; + }; +}; + +/* + * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties + * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, + * including on nested transitions, if nothing in the full role changes then + * MMU re-configuration can be skipped. @valid bit is set on first usage so we + * don't treat all-zero structure as valid data. + * + * The properties that are tracked in the extended role but not the page role + * are for things that either (a) do not affect the validity of the shadow page + * or (b) are indirectly reflected in the shadow page's role. For example, + * CR4.PKE only affects permission checks for software walks of the guest page + * tables (because KVM doesn't support Protection Keys with shadow paging), and + * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. + * + * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. + * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and + * SMAP, but the MMU's permission checks for software walks need to be SMEP and + * SMAP aware regardless of CR0.WP. + */ +union kvm_mmu_extended_role { + u32 word; + struct { + unsigned int valid:1; + unsigned int execonly:1; + unsigned int cr4_pse:1; + unsigned int cr4_pke:1; + unsigned int cr4_smap:1; + unsigned int cr4_smep:1; + unsigned int cr4_la57:1; + unsigned int efer_lma:1; + }; +}; + +union kvm_cpu_role { + u64 as_u64; + struct { + union kvm_mmu_page_role base; + union kvm_mmu_extended_role ext; + }; +}; + +struct kvm_rmap_head { + unsigned long val; +}; + +struct kvm_pio_request { + unsigned long linear_rip; + unsigned long count; + int in; + int port; + int size; +}; + +#define PT64_ROOT_MAX_LEVEL 5 + +struct rsvd_bits_validate { + u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; + u64 bad_mt_xwr; +}; + +struct kvm_mmu_root_info { + gpa_t pgd; + hpa_t hpa; +}; + +#define KVM_MMU_ROOT_INFO_INVALID \ + ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) + +#define KVM_MMU_NUM_PREV_ROOTS 3 + +#define KVM_HAVE_MMU_RWLOCK + +struct kvm_mmu_page; +struct kvm_page_fault; + +/* + * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, + * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the + * current mmu mode. + */ +struct kvm_mmu { + unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); + u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); + int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gpa_t gva_or_gpa, u64 access, + struct x86_exception *exception); + int (*sync_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); + struct kvm_mmu_root_info root; + union kvm_cpu_role cpu_role; + union kvm_mmu_page_role root_role; + + /* + * The pkru_mask indicates if protection key checks are needed. It + * consists of 16 domains indexed by page fault error code bits [4:1], + * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. + * Each domain has 2 bits which are ANDed with AD and WD from PKRU. + */ + u32 pkru_mask; + + struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; + + /* + * Bitmap; bit set = permission fault + * Byte index: page fault error code [4:1] + * Bit index: pte permissions in ACC_* format + */ + u8 permissions[16]; + + u64 *pae_root; + u64 *pml4_root; + u64 *pml5_root; + + /* + * check zero bits on shadow page table entries, these + * bits include not only hardware reserved bits but also + * the bits spte never used. + */ + struct rsvd_bits_validate shadow_zero_check; + + struct rsvd_bits_validate guest_rsvd_check; + + u64 pdptrs[4]; /* pae */ +}; + +struct kvm_tlb_range { + u64 start_gfn; + u64 pages; +}; + +enum pmc_type { + KVM_PMC_GP = 0, + KVM_PMC_FIXED, +}; + +struct kvm_pmc { + enum pmc_type type; + u8 idx; + u64 counter; + u64 eventsel; + struct perf_event *perf_event; + struct kvm_vcpu *vcpu; + /* + * eventsel value for general purpose counters, + * ctrl value for fixed counters. + */ + u64 current_config; + bool is_paused; + bool intr; +}; + +/* More counters may conflict with other existing Architectural MSRs */ +#define KVM_INTEL_PMC_MAX_GENERIC 8 +#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) +#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) +#define KVM_PMC_MAX_FIXED 3 +#define KVM_AMD_PMC_MAX_GENERIC 6 +struct kvm_pmu { + unsigned nr_arch_gp_counters; + unsigned nr_arch_fixed_counters; + unsigned available_event_types; + u64 fixed_ctr_ctrl; + u64 fixed_ctr_ctrl_mask; + u64 global_ctrl; + u64 global_status; + u64 counter_bitmask[2]; + u64 global_ctrl_mask; + u64 global_ovf_ctrl_mask; + u64 reserved_bits; + u64 raw_event_mask; + u8 version; + struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; + struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; + struct irq_work irq_work; + DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + + u64 ds_area; + u64 pebs_enable; + u64 pebs_enable_mask; + u64 pebs_data_cfg; + u64 pebs_data_cfg_mask; + + /* + * If a guest counter is cross-mapped to host counter with different + * index, its PEBS capability will be temporarily disabled. + * + * The user should make sure that this mask is updated + * after disabling interrupts and before perf_guest_get_msrs(); + */ + u64 host_cross_mapped_mask; + + /* + * The gate to release perf_events not marked in + * pmc_in_use only once in a vcpu time slice. + */ + bool need_cleanup; + + /* + * The total number of programmed perf_events and it helps to avoid + * redundant check before cleanup if guest don't use vPMU at all. + */ + u8 event_count; +}; + +struct kvm_pmu_ops; + +enum { + KVM_DEBUGREG_BP_ENABLED = 1, + KVM_DEBUGREG_WONT_EXIT = 2, +}; + +struct kvm_mtrr_range { + u64 base; + u64 mask; + struct list_head node; +}; + +struct kvm_mtrr { + struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR]; + mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION]; + u64 deftype; + + struct list_head head; +}; + +/* Hyper-V SynIC timer */ +struct kvm_vcpu_hv_stimer { + struct hrtimer timer; + int index; + union hv_stimer_config config; + u64 count; + u64 exp_time; + struct hv_message msg; + bool msg_pending; +}; + +/* Hyper-V synthetic interrupt controller (SynIC)*/ +struct kvm_vcpu_hv_synic { + u64 version; + u64 control; + u64 msg_page; + u64 evt_page; + atomic64_t sint[HV_SYNIC_SINT_COUNT]; + atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; + DECLARE_BITMAP(auto_eoi_bitmap, 256); + DECLARE_BITMAP(vec_bitmap, 256); + bool active; + bool dont_zero_synic_pages; +}; + +/* Hyper-V per vcpu emulation context */ +struct kvm_vcpu_hv { + struct kvm_vcpu *vcpu; + u32 vp_index; + u64 hv_vapic; + s64 runtime_offset; + struct kvm_vcpu_hv_synic synic; + struct kvm_hyperv_exit exit; + struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; + DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); + bool enforce_cpuid; + struct { + u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ + u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ + u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ + u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ + u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ + u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ + u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ + u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ + } cpuid_cache; +}; + +/* Xen HVM per vcpu emulation context */ +struct kvm_vcpu_xen { + u64 hypercall_rip; + u32 current_runstate; + u8 upcall_vector; + struct gfn_to_pfn_cache vcpu_info_cache; + struct gfn_to_pfn_cache vcpu_time_info_cache; + struct gfn_to_pfn_cache runstate_cache; + u64 last_steal; + u64 runstate_entry_time; + u64 runstate_times[4]; + unsigned long evtchn_pending_sel; + u32 vcpu_id; /* The Xen / ACPI vCPU ID */ + u32 timer_virq; + u64 timer_expires; /* In guest epoch */ + atomic_t timer_pending; + struct hrtimer timer; + int poll_evtchn; + struct timer_list poll_timer; +}; + +struct kvm_queued_exception { + bool pending; + bool injected; + bool has_error_code; + u8 vector; + u32 error_code; + unsigned long payload; + bool has_payload; +}; + +struct kvm_vcpu_arch { + /* + * rip and regs accesses must go through + * kvm_{register,rip}_{read,write} functions. + */ + unsigned long regs[NR_VCPU_REGS]; + u32 regs_avail; + u32 regs_dirty; + + unsigned long cr0; + unsigned long cr0_guest_owned_bits; + unsigned long cr2; + unsigned long cr3; + unsigned long cr4; + unsigned long cr4_guest_owned_bits; + unsigned long cr4_guest_rsvd_bits; + unsigned long cr8; + u32 host_pkru; + u32 pkru; + u32 hflags; + u64 efer; + u64 apic_base; + struct kvm_lapic *apic; /* kernel irqchip context */ + bool load_eoi_exitmap_pending; + DECLARE_BITMAP(ioapic_handled_vectors, 256); + unsigned long apic_attention; + int32_t apic_arb_prio; + int mp_state; + u64 ia32_misc_enable_msr; + u64 smbase; + u64 smi_count; + bool at_instruction_boundary; + bool tpr_access_reporting; + bool xsaves_enabled; + bool xfd_no_write_intercept; + u64 ia32_xss; + u64 microcode_version; + u64 arch_capabilities; + u64 perf_capabilities; + + /* + * Paging state of the vcpu + * + * If the vcpu runs in guest mode with two level paging this still saves + * the paging mode of the l1 guest. This context is always used to + * handle faults. + */ + struct kvm_mmu *mmu; + + /* Non-nested MMU for L1 */ + struct kvm_mmu root_mmu; + + /* L1 MMU when running nested */ + struct kvm_mmu guest_mmu; + + /* + * Paging state of an L2 guest (used for nested npt) + * + * This context will save all necessary information to walk page tables + * of an L2 guest. This context is only initialized for page table + * walking and not for faulting since we never handle l2 page faults on + * the host. + */ + struct kvm_mmu nested_mmu; + + /* + * Pointer to the mmu context currently used for + * gva_to_gpa translations. + */ + struct kvm_mmu *walk_mmu; + + struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; + struct kvm_mmu_memory_cache mmu_shadow_page_cache; + struct kvm_mmu_memory_cache mmu_shadowed_info_cache; + struct kvm_mmu_memory_cache mmu_page_header_cache; + + /* + * QEMU userspace and the guest each have their own FPU state. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpstate" state here contains the guest FPU context, with the + * host PRKU bits. + */ + struct fpu_guest guest_fpu; + + u64 xcr0; + u64 guest_supported_xcr0; + + struct kvm_pio_request pio; + void *pio_data; + void *sev_pio_data; + unsigned sev_pio_count; + + u8 event_exit_inst_len; + + bool exception_from_userspace; + + /* Exceptions to be injected to the guest. */ + struct kvm_queued_exception exception; + /* Exception VM-Exits to be synthesized to L1. */ + struct kvm_queued_exception exception_vmexit; + + struct kvm_queued_interrupt { + bool injected; + bool soft; + u8 nr; + } interrupt; + + int halt_request; /* real mode on Intel only */ + + int cpuid_nent; + struct kvm_cpuid_entry2 *cpuid_entries; + u32 kvm_cpuid_base; + + u64 reserved_gpa_bits; + int maxphyaddr; + + /* emulate context */ + + struct x86_emulate_ctxt *emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; + int (*complete_userspace_io)(struct kvm_vcpu *vcpu); + + gpa_t time; + struct pvclock_vcpu_time_info hv_clock; + unsigned int hw_tsc_khz; + struct gfn_to_pfn_cache pv_time; + /* set guest stopped flag in pvclock flags field */ + bool pvclock_set_guest_stopped_request; + + struct { + u8 preempted; + u64 msr_val; + u64 last_steal; + struct gfn_to_hva_cache cache; + } st; + + u64 l1_tsc_offset; + u64 tsc_offset; /* current tsc offset */ + u64 last_guest_tsc; + u64 last_host_tsc; + u64 tsc_offset_adjustment; + u64 this_tsc_nsec; + u64 this_tsc_write; + u64 this_tsc_generation; + bool tsc_catchup; + bool tsc_always_catchup; + s8 virtual_tsc_shift; + u32 virtual_tsc_mult; + u32 virtual_tsc_khz; + s64 ia32_tsc_adjust_msr; + u64 msr_ia32_power_ctl; + u64 l1_tsc_scaling_ratio; + u64 tsc_scaling_ratio; /* current scaling ratio */ + + atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ + unsigned nmi_pending; /* NMI queued after currently running handler */ + bool nmi_injected; /* Trying to inject an NMI this entry */ + bool smi_pending; /* SMI queued after currently running handler */ + u8 handling_intr_from_guest; + + struct kvm_mtrr mtrr_state; + u64 pat; + + unsigned switch_db_regs; + unsigned long db[KVM_NR_DB_REGS]; + unsigned long dr6; + unsigned long dr7; + unsigned long eff_db[KVM_NR_DB_REGS]; + unsigned long guest_debug_dr7; + u64 msr_platform_info; + u64 msr_misc_features_enables; + + u64 mcg_cap; + u64 mcg_status; + u64 mcg_ctl; + u64 mcg_ext_ctl; + u64 *mce_banks; + u64 *mci_ctl2_banks; + + /* Cache MMIO info */ + u64 mmio_gva; + unsigned mmio_access; + gfn_t mmio_gfn; + u64 mmio_gen; + + struct kvm_pmu pmu; + + /* used for guest single stepping over the given code position */ + unsigned long singlestep_rip; + + bool hyperv_enabled; + struct kvm_vcpu_hv *hyperv; + struct kvm_vcpu_xen xen; + + cpumask_var_t wbinvd_dirty_mask; + + unsigned long last_retry_eip; + unsigned long last_retry_addr; + + struct { + bool halted; + gfn_t gfns[ASYNC_PF_PER_VCPU]; + struct gfn_to_hva_cache data; + u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ + u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ + u16 vec; + u32 id; + bool send_user_only; + u32 host_apf_flags; + bool delivery_as_pf_vmexit; + bool pageready_pending; + } apf; + + /* OSVW MSRs (AMD only) */ + struct { + u64 length; + u64 status; + } osvw; + + struct { + u64 msr_val; + struct gfn_to_hva_cache data; + } pv_eoi; + + u64 msr_kvm_poll_control; + + /* + * Indicates the guest is trying to write a gfn that contains one or + * more of the PTEs used to translate the write itself, i.e. the access + * is changing its own translation in the guest page tables. KVM exits + * to userspace if emulation of the faulting instruction fails and this + * flag is set, as KVM cannot make forward progress. + * + * If emulation fails for a write to guest page tables, KVM unprotects + * (zaps) the shadow page for the target gfn and resumes the guest to + * retry the non-emulatable instruction (on hardware). Unprotecting the + * gfn doesn't allow forward progress for a self-changing access because + * doing so also zaps the translation for the gfn, i.e. retrying the + * instruction will hit a !PRESENT fault, which results in a new shadow + * page and sends KVM back to square one. + */ + bool write_fault_to_shadow_pgtable; + + /* set at EPT violation at this point */ + unsigned long exit_qualification; + + /* pv related host specific info */ + struct { + bool pv_unhalted; + } pv; + + int pending_ioapic_eoi; + int pending_external_vector; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; + + /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ + bool l1tf_flush_l1d; + + /* Host CPU on which VM-entry was most recently attempted */ + int last_vmentry_cpu; + + /* AMD MSRC001_0015 Hardware Configuration */ + u64 msr_hwcr; + + /* pv related cpuid info */ + struct { + /* + * value of the eax register in the KVM_CPUID_FEATURES CPUID + * leaf. + */ + u32 features; + + /* + * indicates whether pv emulation should be disabled if features + * are not present in the guest's cpuid + */ + bool enforce; + } pv_cpuid; + + /* Protected Guests */ + bool guest_state_protected; + + /* + * Set when PDPTS were loaded directly by the userspace without + * reading the guest memory + */ + bool pdptrs_from_userspace; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; +#endif +}; + +struct kvm_lpage_info { + int disallow_lpage; +}; + +struct kvm_arch_memory_slot { + struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; + struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; + unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; +}; + +/* + * We use as the mode the number of bits allocated in the LDR for the + * logical processor ID. It happens that these are all powers of two. + * This makes it is very easy to detect cases where the APICs are + * configured for multiple modes; in that case, we cannot use the map and + * hence cannot use kvm_irq_delivery_to_apic_fast either. + */ +#define KVM_APIC_MODE_XAPIC_CLUSTER 4 +#define KVM_APIC_MODE_XAPIC_FLAT 8 +#define KVM_APIC_MODE_X2APIC 16 + +struct kvm_apic_map { + struct rcu_head rcu; + u8 mode; + u32 max_apic_id; + union { + struct kvm_lapic *xapic_flat_map[8]; + struct kvm_lapic *xapic_cluster_map[16][4]; + }; + struct kvm_lapic *phys_map[]; +}; + +/* Hyper-V synthetic debugger (SynDbg)*/ +struct kvm_hv_syndbg { + struct { + u64 control; + u64 status; + u64 send_page; + u64 recv_page; + u64 pending_page; + } control; + u64 options; +}; + +/* Current state of Hyper-V TSC page clocksource */ +enum hv_tsc_page_status { + /* TSC page was not set up or disabled */ + HV_TSC_PAGE_UNSET = 0, + /* TSC page MSR was written by the guest, update pending */ + HV_TSC_PAGE_GUEST_CHANGED, + /* TSC page update was triggered from the host side */ + HV_TSC_PAGE_HOST_CHANGED, + /* TSC page was properly set up and is currently active */ + HV_TSC_PAGE_SET, + /* TSC page was set up with an inaccessible GPA */ + HV_TSC_PAGE_BROKEN, +}; + +/* Hyper-V emulation context */ +struct kvm_hv { + struct mutex hv_lock; + u64 hv_guest_os_id; + u64 hv_hypercall; + u64 hv_tsc_page; + enum hv_tsc_page_status hv_tsc_page_status; + + /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ + u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; + u64 hv_crash_ctl; + + struct ms_hyperv_tsc_page tsc_ref; + + struct idr conn_to_evt; + + u64 hv_reenlightenment_control; + u64 hv_tsc_emulation_control; + u64 hv_tsc_emulation_status; + + /* How many vCPUs have VP index != vCPU index */ + atomic_t num_mismatched_vp_indexes; + + /* + * How many SynICs use 'AutoEOI' feature + * (protected by arch.apicv_update_lock) + */ + unsigned int synic_auto_eoi_used; + + struct hv_partition_assist_pg *hv_pa_pg; + struct kvm_hv_syndbg hv_syndbg; +}; + +struct msr_bitmap_range { + u32 flags; + u32 nmsrs; + u32 base; + unsigned long *bitmap; +}; + +/* Xen emulation context */ +struct kvm_xen { + u32 xen_version; + bool long_mode; + u8 upcall_vector; + struct gfn_to_pfn_cache shinfo_cache; + struct idr evtchn_ports; + unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; +}; + +enum kvm_irqchip_mode { + KVM_IRQCHIP_NONE, + KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ + KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ +}; + +struct kvm_x86_msr_filter { + u8 count; + bool default_allow:1; + struct msr_bitmap_range ranges[16]; +}; + +enum kvm_apicv_inhibit { + + /********************************************************************/ + /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ + /********************************************************************/ + + /* + * APIC acceleration is disabled by a module parameter + * and/or not supported in hardware. + */ + APICV_INHIBIT_REASON_DISABLE, + + /* + * APIC acceleration is inhibited because AutoEOI feature is + * being used by a HyperV guest. + */ + APICV_INHIBIT_REASON_HYPERV, + + /* + * APIC acceleration is inhibited because the userspace didn't yet + * enable the kernel/split irqchip. + */ + APICV_INHIBIT_REASON_ABSENT, + + /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ + * (out of band, debug measure of blocking all interrupts on this vCPU) + * was enabled, to avoid AVIC/APICv bypassing it. + */ + APICV_INHIBIT_REASON_BLOCKIRQ, + + /* + * For simplicity, the APIC acceleration is inhibited + * first time either APIC ID or APIC base are changed by the guest + * from their reset values. + */ + APICV_INHIBIT_REASON_APIC_ID_MODIFIED, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, + + /******************************************************/ + /* INHIBITs that are relevant only to the AMD's AVIC. */ + /******************************************************/ + + /* + * AVIC is inhibited on a vCPU because it runs a nested guest. + * + * This is needed because unlike APICv, the peers of this vCPU + * cannot use the doorbell mechanism to signal interrupts via AVIC when + * a vCPU runs nested. + */ + APICV_INHIBIT_REASON_NESTED, + + /* + * On SVM, the wait for the IRQ window is implemented with pending vIRQ, + * which cannot be injected when the AVIC is enabled, thus AVIC + * is inhibited while KVM waits for IRQ window. + */ + APICV_INHIBIT_REASON_IRQWIN, + + /* + * PIT (i8254) 're-inject' mode, relies on EOI intercept, + * which AVIC doesn't support for edge triggered interrupts. + */ + APICV_INHIBIT_REASON_PIT_REINJ, + + /* + * AVIC is disabled because SEV doesn't support it. + */ + APICV_INHIBIT_REASON_SEV, +}; + +struct kvm_arch { + unsigned long n_used_mmu_pages; + unsigned long n_requested_mmu_pages; + unsigned long n_max_mmu_pages; + unsigned int indirect_shadow_pages; + u8 mmu_valid_gen; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; + struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; + struct list_head lpage_disallowed_mmu_pages; + struct kvm_page_track_notifier_node mmu_sp_tracker; + struct kvm_page_track_notifier_head track_notifier_head; + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; + + struct list_head assigned_dev_head; + struct iommu_domain *iommu_domain; + bool iommu_noncoherent; +#define __KVM_HAVE_ARCH_NONCOHERENT_DMA + atomic_t noncoherent_dma_count; +#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE + atomic_t assigned_device_count; + struct kvm_pic *vpic; + struct kvm_ioapic *vioapic; + struct kvm_pit *vpit; + atomic_t vapics_in_nmi_mode; + struct mutex apic_map_lock; + struct kvm_apic_map __rcu *apic_map; + atomic_t apic_map_dirty; + + /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */ + struct rw_semaphore apicv_update_lock; + + bool apic_access_memslot_enabled; + unsigned long apicv_inhibit_reasons; + + gpa_t wall_clock; + + bool mwait_in_guest; + bool hlt_in_guest; + bool pause_in_guest; + bool cstate_in_guest; + + unsigned long irq_sources_bitmap; + s64 kvmclock_offset; + + /* + * This also protects nr_vcpus_matched_tsc which is read from a + * preemption-disabled region, so it must be a raw spinlock. + */ + raw_spinlock_t tsc_write_lock; + u64 last_tsc_nsec; + u64 last_tsc_write; + u32 last_tsc_khz; + u64 last_tsc_offset; + u64 cur_tsc_nsec; + u64 cur_tsc_write; + u64 cur_tsc_offset; + u64 cur_tsc_generation; + int nr_vcpus_matched_tsc; + + u32 default_tsc_khz; + + seqcount_raw_spinlock_t pvclock_sc; + bool use_master_clock; + u64 master_kernel_ns; + u64 master_cycle_now; + struct delayed_work kvmclock_update_work; + struct delayed_work kvmclock_sync_work; + + struct kvm_xen_hvm_config xen_hvm_config; + + /* reads protected by irq_srcu, writes by irq_lock */ + struct hlist_head mask_notifier_list; + + struct kvm_hv hyperv; + struct kvm_xen xen; + + bool backwards_tsc_observed; + bool boot_vcpu_runs_old_kvmclock; + u32 bsp_vcpu_id; + + u64 disabled_quirks; + int cpu_dirty_logging_count; + + enum kvm_irqchip_mode irqchip_mode; + u8 nr_reserved_ioapic_pins; + + bool disabled_lapic_found; + + bool x2apic_format; + bool x2apic_broadcast_quirk_disabled; + + bool guest_can_read_msr_platform_info; + bool exception_payload_enabled; + + bool triple_fault_event; + + bool bus_lock_detection_enabled; + bool enable_pmu; + + u32 notify_window; + u32 notify_vmexit_flags; + /* + * If exit_on_emulation_error is set, and the in-kernel instruction + * emulator fails to emulate an instruction, allow userspace + * the opportunity to look at it. + */ + bool exit_on_emulation_error; + + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ + u32 user_space_msr_mask; + struct kvm_x86_msr_filter __rcu *msr_filter; + + u32 hypercall_exit_enabled; + + /* Guest can access the SGX PROVISIONKEY. */ + bool sgx_provisioning_allowed; + + struct kvm_pmu_event_filter __rcu *pmu_event_filter; + struct task_struct *nx_lpage_recovery_thread; + +#ifdef CONFIG_X86_64 + /* + * Whether the TDP MMU is enabled for this VM. This contains a + * snapshot of the TDP MMU module parameter from when the VM was + * created and remains unchanged for the life of the VM. If this is + * true, TDP MMU handler functions will run for various MMU + * operations. + */ + bool tdp_mmu_enabled; + + /* + * List of kvm_mmu_page structs being used as roots. + * All kvm_mmu_page structs in the list should have + * tdp_mmu_page set. + * + * For reads, this list is protected by: + * the MMU lock in read mode + RCU or + * the MMU lock in write mode + * + * For writes, this list is protected by: + * the MMU lock in read mode + the tdp_mmu_pages_lock or + * the MMU lock in write mode + * + * Roots will remain in the list until their tdp_mmu_root_count + * drops to zero, at which point the thread that decremented the + * count to zero should removed the root from the list and clean + * it up, freeing the root after an RCU grace period. + */ + struct list_head tdp_mmu_roots; + + /* + * List of kvm_mmu_page structs not being used as roots. + * All kvm_mmu_page structs in the list should have + * tdp_mmu_page set and a tdp_mmu_root_count of 0. + */ + struct list_head tdp_mmu_pages; + + /* + * Protects accesses to the following fields when the MMU lock + * is held in read mode: + * - tdp_mmu_roots (above) + * - tdp_mmu_pages (above) + * - the link field of kvm_mmu_page structs used by the TDP MMU + * - lpage_disallowed_mmu_pages + * - the lpage_disallowed_link field of kvm_mmu_page structs used + * by the TDP MMU + * It is acceptable, but not necessary, to acquire this lock when + * the thread holds the MMU lock in write mode. + */ + spinlock_t tdp_mmu_pages_lock; +#endif /* CONFIG_X86_64 */ + + /* + * If set, at least one shadow root has been allocated. This flag + * is used as one input when determining whether certain memslot + * related allocations are necessary. + */ + bool shadow_root_allocated; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; + spinlock_t hv_root_tdp_lock; +#endif + /* + * VM-scope maximum vCPU ID. Used to determine the size of structures + * that increase along with the maximum vCPU ID, in which case, using + * the global KVM_MAX_VCPU_IDS may lead to significant memory waste. + */ + u32 max_vcpu_ids; + + bool disable_nx_huge_pages; + + /* + * Memory caches used to allocate shadow pages when performing eager + * page splitting. No need for a shadowed_info_cache since eager page + * splitting only allocates direct shadow pages. + * + * Protected by kvm->slots_lock. + */ + struct kvm_mmu_memory_cache split_shadow_page_cache; + struct kvm_mmu_memory_cache split_page_header_cache; + + /* + * Memory cache used to allocate pte_list_desc structs while splitting + * huge pages. In the worst case, to split one huge page, 512 + * pte_list_desc structs are needed to add each lower level leaf sptep + * to the rmap plus 1 to extend the parent_ptes rmap of the lower level + * page table. + * + * Protected by kvm->slots_lock. + */ +#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) + struct kvm_mmu_memory_cache split_desc_cache; +}; + +struct kvm_vm_stat { + struct kvm_vm_stat_generic generic; + u64 mmu_shadow_zapped; + u64 mmu_pte_write; + u64 mmu_pde_zapped; + u64 mmu_flooded; + u64 mmu_recycled; + u64 mmu_cache_miss; + u64 mmu_unsync; + union { + struct { + atomic64_t pages_4k; + atomic64_t pages_2m; + atomic64_t pages_1g; + }; + atomic64_t pages[KVM_NR_PAGE_SIZES]; + }; + u64 nx_lpage_splits; + u64 max_mmu_page_hash_collisions; + u64 max_mmu_rmap_size; +}; + +struct kvm_vcpu_stat { + struct kvm_vcpu_stat_generic generic; + u64 pf_taken; + u64 pf_fixed; + u64 pf_emulate; + u64 pf_spurious; + u64 pf_fast; + u64 pf_mmio_spte_created; + u64 pf_guest; + u64 tlb_flush; + u64 invlpg; + + u64 exits; + u64 io_exits; + u64 mmio_exits; + u64 signal_exits; + u64 irq_window_exits; + u64 nmi_window_exits; + u64 l1d_flush; + u64 halt_exits; + u64 request_irq_exits; + u64 irq_exits; + u64 host_state_reload; + u64 fpu_reload; + u64 insn_emulation; + u64 insn_emulation_fail; + u64 hypercalls; + u64 irq_injections; + u64 nmi_injections; + u64 req_event; + u64 nested_run; + u64 directed_yield_attempted; + u64 directed_yield_successful; + u64 preemption_reported; + u64 preemption_other; + u64 guest_mode; + u64 notify_window_exits; +}; + +struct x86_instruction_info; + +struct msr_data { + bool host_initiated; + u32 index; + u64 data; +}; + +struct kvm_lapic_irq { + u32 vector; + u16 delivery_mode; + u16 dest_mode; + bool level; + u16 trig_mode; + u32 shorthand; + u32 dest_id; + bool msi_redir_hint; +}; + +static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) +{ + return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; +} + +struct kvm_x86_ops { + const char *name; + + int (*hardware_enable)(void); + void (*hardware_disable)(void); + void (*hardware_unsetup)(void); + bool (*has_emulated_msr)(struct kvm *kvm, u32 index); + void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); + + unsigned int vm_size; + int (*vm_init)(struct kvm *kvm); + void (*vm_destroy)(struct kvm *kvm); + + /* Create, but do not attach this VCPU */ + int (*vcpu_precreate)(struct kvm *kvm); + int (*vcpu_create)(struct kvm_vcpu *vcpu); + void (*vcpu_free)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); + + void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); + void (*vcpu_put)(struct kvm_vcpu *vcpu); + + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); + int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); + int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); + u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); + void (*get_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + int (*get_cpl)(struct kvm_vcpu *vcpu); + void (*set_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); + void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); + void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); + void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); + unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); + void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + bool (*get_if_flag)(struct kvm_vcpu *vcpu); + + void (*flush_tlb_all)(struct kvm_vcpu *vcpu); + void (*flush_tlb_current)(struct kvm_vcpu *vcpu); + int (*tlb_remote_flush)(struct kvm *kvm); + int (*tlb_remote_flush_with_range)(struct kvm *kvm, + struct kvm_tlb_range *range); + + /* + * Flush any TLB entries associated with the given GVA. + * Does not need to flush GPA->HPA mappings. + * Can potentially get non-canonical addresses through INVLPGs, which + * the implementation may choose to ignore if appropriate. + */ + void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr); + + /* + * Flush any TLB entries created by the guest. Like tlb_flush_gva(), + * does not need to flush GPA->HPA mappings. + */ + void (*flush_tlb_guest)(struct kvm_vcpu *vcpu); + + int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); + enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath); + int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); + void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected); + void (*inject_nmi)(struct kvm_vcpu *vcpu); + void (*inject_exception)(struct kvm_vcpu *vcpu); + void (*cancel_injection)(struct kvm_vcpu *vcpu); + int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); + void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason); + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); + void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); + void (*hwapic_isr_update)(int isr); + bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); + void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector); + int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); + int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); + u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); + + void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, + int root_level); + + bool (*has_wbinvd_exit)(void); + + u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); + u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); + void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); + + /* + * Retrieve somewhat arbitrary exit information. Intended to + * be used only from within tracepoints or error paths. + */ + void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, + u32 *exit_int_info, u32 *exit_int_info_err_code); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage, + struct x86_exception *exception); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); + + void (*request_immediate_exit)(struct kvm_vcpu *vcpu); + + void (*sched_in)(struct kvm_vcpu *kvm, int cpu); + + /* + * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero + * value indicates CPU dirty logging is unsupported or disabled. + */ + int cpu_dirty_log_size; + void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); + + const struct kvm_x86_nested_ops *nested_ops; + + void (*vcpu_blocking)(struct kvm_vcpu *vcpu); + void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); + + int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); + void (*pi_start_assignment)(struct kvm *kvm); + void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); + void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); + + int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired); + void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); + + void (*setup_mce)(struct kvm_vcpu *vcpu); + + int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); + int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); + void (*enable_smi_window)(struct kvm_vcpu *vcpu); + + int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); + int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); + int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); + int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); + int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); + void (*guest_memory_reclaimed)(struct kvm *kvm); + + int (*get_msr_feature)(struct kvm_msr_entry *entry); + + bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); + + bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); + int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + + void (*migrate_timers)(struct kvm_vcpu *vcpu); + void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); + + void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); + + /* + * Returns vCPU specific APICv inhibit reasons + */ + unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); +}; + +struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); + bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, + u32 error_code); + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*has_events)(struct kvm_vcpu *vcpu); + void (*triple_fault)(struct kvm_vcpu *vcpu); + int (*get_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); + + int (*enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); + uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); +}; + +struct kvm_x86_init_ops { + int (*cpu_has_kvm_support)(void); + int (*disabled_by_bios)(void); + int (*check_processor_compatibility)(void); + int (*hardware_setup)(void); + unsigned int (*handle_intel_pt_intr)(void); + + struct kvm_x86_ops *runtime_ops; + struct kvm_pmu_ops *pmu_ops; +}; + +struct kvm_arch_async_pf { + u32 token; + gfn_t gfn; + unsigned long cr3; + bool direct_map; +}; + +extern u32 __read_mostly kvm_nr_uret_msrs; +extern u64 __read_mostly host_efer; +extern bool __read_mostly allow_smaller_maxphyaddr; +extern bool __read_mostly enable_apicv; +extern struct kvm_x86_ops kvm_x86_ops; + +#define KVM_X86_OP(func) \ + DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func)); +#define KVM_X86_OP_OPTIONAL KVM_X86_OP +#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP +#include + +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); +void kvm_x86_vendor_exit(void); + +#define __KVM_HAVE_ARCH_VM_ALLOC +static inline struct kvm *kvm_arch_alloc_vm(void) +{ + return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); +} + +#define __KVM_HAVE_ARCH_VM_FREE +void kvm_arch_free_vm(struct kvm *kvm); + +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB +static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +{ + if (kvm_x86_ops.tlb_remote_flush && + !static_call(kvm_x86_tlb_remote_flush)(kvm)) + return 0; + else + return -ENOTSUPP; +} + +#define kvm_arch_pmi_in_guest(vcpu) \ + ((vcpu) && (vcpu)->arch.handling_intr_from_guest) + +void __init kvm_mmu_x86_module_init(void); +int kvm_mmu_vendor_module_init(void); +void kvm_mmu_vendor_module_exit(void); + +void kvm_mmu_destroy(struct kvm_vcpu *vcpu); +int kvm_mmu_create(struct kvm_vcpu *vcpu); +void kvm_mmu_init_vm(struct kvm *kvm); +void kvm_mmu_uninit_vm(struct kvm *kvm); + +void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); +void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + int start_level); +void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + int target_level); +void kvm_mmu_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + u64 start, u64 end, + int target_level); +void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, + const struct kvm_memory_slot *memslot); +void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, + const struct kvm_memory_slot *memslot); +void kvm_mmu_zap_all(struct kvm *kvm); +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); + +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); + +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes); + +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask); + +extern bool tdp_enabled; + +u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); + +/* + * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing + * userspace I/O) to indicate that the emulation context + * should be reused as is, i.e. skip initialization of + * emulation context, instruction fetch and decode. + * + * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. + * Indicates that only select instructions (tagged with + * EmulateOnUD) should be emulated (to minimize the emulator + * attack surface). See also EMULTYPE_TRAP_UD_FORCED. + * + * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to + * decode the instruction length. For use *only* by + * kvm_x86_ops.skip_emulated_instruction() implementations if + * EMULTYPE_COMPLETE_USER_EXIT is not set. + * + * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to + * retry native execution under certain conditions, + * Can only be set in conjunction with EMULTYPE_PF. + * + * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was + * triggered by KVM's magic "force emulation" prefix, + * which is opt in via module param (off by default). + * Bypasses EmulateOnUD restriction despite emulating + * due to an intercepted #UD (see EMULTYPE_TRAP_UD). + * Used to test the full emulator from userspace. + * + * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware + * backdoor emulation, which is opt in via module param. + * VMware backdoor emulation handles select instructions + * and reinjects the #GP for all other cases. + * + * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which + * case the CR2/GPA value pass on the stack is valid. + * + * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility + * state and inject single-step #DBs after skipping + * an instruction (after completing userspace I/O). + */ +#define EMULTYPE_NO_DECODE (1 << 0) +#define EMULTYPE_TRAP_UD (1 << 1) +#define EMULTYPE_SKIP (1 << 2) +#define EMULTYPE_ALLOW_RETRY_PF (1 << 3) +#define EMULTYPE_TRAP_UD_FORCED (1 << 4) +#define EMULTYPE_VMWARE_GP (1 << 5) +#define EMULTYPE_PF (1 << 6) +#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) + +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len); +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, + u64 *data, u8 ndata); +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); + +void kvm_enable_efer_bits(u64); +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); +int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); +int kvm_emulate_invd(struct kvm_vcpu *vcpu); +int kvm_emulate_mwait(struct kvm_vcpu *vcpu); +int kvm_handle_invalid_op(struct kvm_vcpu *vcpu); +int kvm_emulate_monitor(struct kvm_vcpu *vcpu); + +int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); +int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); +int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); + +void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); + +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, + int reason, bool has_error_code, u32 error_code); + +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); +int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); +void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); + +int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); +int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); + +unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); + +void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); +void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); +void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); +void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); +void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault); +bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); +bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); + +static inline int __kvm_irq_line_state(unsigned long *irq_state, + int irq_source_id, int level) +{ + /* Logical OR for level trig interrupt */ + if (level) + __set_bit(irq_source_id, irq_state); + else + __clear_bit(irq_source_id, irq_state); + + return !!(*irq_state); +} + +#define KVM_MMU_ROOT_CURRENT BIT(0) +#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) +#define KVM_MMU_ROOTS_ALL (~0UL) + +int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); +void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); + +void kvm_inject_nmi(struct kvm_vcpu *vcpu); + +void kvm_update_dr7(struct kvm_vcpu *vcpu); + +int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); +void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, + ulong roots_to_free); +void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); +gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); + +bool kvm_apicv_activated(struct kvm *kvm); +bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu); +void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); +void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set); +void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set); + +static inline void kvm_set_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason) +{ + kvm_set_or_clear_apicv_inhibit(kvm, reason, true); +} + +static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason) +{ + kvm_set_or_clear_apicv_inhibit(kvm, reason, false); +} + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, + void *insn, int insn_len); +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t gva, hpa_t root_hpa); +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); + +void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + int tdp_max_root_level, int tdp_huge_page_level); + +static inline u16 kvm_read_ldt(void) +{ + u16 ldt; + asm("sldt %0" : "=g"(ldt)); + return ldt; +} + +static inline void kvm_load_ldt(u16 sel) +{ + asm("lldt %0" : : "rm"(sel)); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long read_msr(unsigned long msr) +{ + u64 value; + + rdmsrl(msr, value); + return value; +} +#endif + +static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) +{ + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); +} + +#define TSS_IOPB_BASE_OFFSET 0x66 +#define TSS_BASE_SIZE 0x68 +#define TSS_IOPB_SIZE (65536 / 8) +#define TSS_REDIRECTION_SIZE (256 / 8) +#define RMODE_TSS_SIZE \ + (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) + +enum { + TASK_SWITCH_CALL = 0, + TASK_SWITCH_IRET = 1, + TASK_SWITCH_JMP = 2, + TASK_SWITCH_GATE = 3, +}; + +#define HF_GIF_MASK (1 << 0) +#define HF_NMI_MASK (1 << 3) +#define HF_IRET_MASK (1 << 4) +#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ +#define HF_SMM_MASK (1 << 6) +#define HF_SMM_INSIDE_NMI_MASK (1 << 7) + +#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +#define KVM_ADDRESS_SPACE_NUM 2 + +#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) +#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) + +#define KVM_ARCH_WANT_MMU_NOTIFIER + +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); +int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); + +int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, + unsigned long ipi_bitmap_high, u32 min, + unsigned long icr, int op_64_bit); + +int kvm_add_user_return_msr(u32 msr); +int kvm_find_user_return_msr(u32 msr); +int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); + +static inline bool kvm_is_supported_user_return_msr(u32 msr) +{ + return kvm_find_user_return_msr(msr) >= 0; +} + +u64 kvm_scale_tsc(u64 tsc, u64 ratio); +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); +u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); +u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); + +unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); +bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); + +void kvm_make_scan_ioapic_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, + unsigned long *vcpu_bitmap); + +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); +extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); + +int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); +int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); +void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); + +void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size); +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); + +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu); + +void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, + struct kvm_lapic_irq *irq); + +static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) +{ + /* We can only post Fixed and LowPrio IRQs */ + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); +} + +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + static_call_cond(kvm_x86_vcpu_blocking)(vcpu); +} + +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + static_call_cond(kvm_x86_vcpu_unblocking)(vcpu); +} + +static inline int kvm_cpu_get_apicid(int mps_cpu) +{ +#ifdef CONFIG_X86_LOCAL_APIC + return default_cpu_present_to_apicid(mps_cpu); +#else + WARN_ON_ONCE(1); + return BAD_APICID; +#endif +} + +#define put_smstate(type, buf, offset, val) \ + *(type *)((buf) + (offset) - 0x7e00) = val + +#define GET_SMSTATE(type, buf, offset) \ + (*(type *)((buf) + (offset) - 0x7e00)) + +int kvm_cpu_dirty_log_size(void); + +int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); + +#define KVM_CLOCK_VALID_FLAGS \ + (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) + +#define KVM_X86_VALID_QUIRKS \ + (KVM_X86_QUIRK_LINT0_REENABLED | \ + KVM_X86_QUIRK_CD_NW_CLEARED | \ + KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \ + KVM_X86_QUIRK_OUT_7E_INC_RIP | \ + KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ + KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ + KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) + +#endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h new file mode 100644 index 000000000..eb186bc57 --- /dev/null +++ b/arch/x86/include/asm/kvm_page_track.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_PAGE_TRACK_H +#define _ASM_X86_KVM_PAGE_TRACK_H + +enum kvm_page_track_mode { + KVM_PAGE_TRACK_WRITE, + KVM_PAGE_TRACK_MAX, +}; + +/* + * The notifier represented by @kvm_page_track_notifier_node is linked into + * the head which will be notified when guest is triggering the track event. + * + * Write access on the head is protected by kvm->mmu_lock, read access + * is protected by track_srcu. + */ +struct kvm_page_track_notifier_head { + struct srcu_struct track_srcu; + struct hlist_head track_notifier_list; +}; + +struct kvm_page_track_notifier_node { + struct hlist_node node; + + /* + * It is called when guest is writing the write-tracked page + * and write emulation is finished at that time. + * + * @vcpu: the vcpu where the write access happened. + * @gpa: the physical address written by guest. + * @new: the data was written to the address. + * @bytes: the written length. + * @node: this node + */ + void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes, struct kvm_page_track_notifier_node *node); + /* + * It is called when memory slot is being moved or removed + * users can drop write-protection for the pages in that memory slot + * + * @kvm: the kvm where memory slot being moved or removed + * @slot: the memory slot being moved or removed + * @node: this node + */ + void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot, + struct kvm_page_track_notifier_node *node); +}; + +int kvm_page_track_init(struct kvm *kvm); +void kvm_page_track_cleanup(struct kvm *kvm); + +bool kvm_page_track_write_tracking_enabled(struct kvm *kvm); +int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot); + +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, + unsigned long npages); + +void kvm_slot_page_track_add_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); +void kvm_slot_page_track_remove_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); +bool kvm_slot_page_track_is_active(struct kvm *kvm, + const struct kvm_memory_slot *slot, + gfn_t gfn, enum kvm_page_track_mode mode); + +void +kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void +kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); +void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot); +#endif diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h new file mode 100644 index 000000000..57bc74e11 --- /dev/null +++ b/arch/x86/include/asm/kvm_para.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_PARA_H +#define _ASM_X86_KVM_PARA_H + +#include +#include +#include +#include + +#include + +#ifdef CONFIG_KVM_GUEST +bool kvm_check_and_clear_guest_paused(void); +#else +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} +#endif /* CONFIG_KVM_GUEST */ + +#define KVM_HYPERCALL \ + ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL) + +/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall + * instruction. The hypervisor may replace it with something else but only the + * instructions are guaranteed to be supported. + * + * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. + * The hypercall number should be placed in rax and the return value will be + * placed in rax. No other registers will be clobbered unless explicitly + * noted by the particular hypercall. + */ + +static inline long kvm_hypercall0(unsigned int nr) +{ + long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, 0, 0, 0, 0); + + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr) + : "memory"); + return ret; +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, 0, 0, 0); + + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1) + : "memory"); + return ret; +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, p2, 0, 0); + + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2) + : "memory"); + return ret; +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, p2, p3, 0); + + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); + return ret; +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, p2, p3, p4); + + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4) + : "memory"); + return ret; +} + +static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + + asm volatile("vmmcall" + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); + return ret; +} + +#ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); +bool kvm_para_available(void); +unsigned int kvm_arch_para_features(void); +unsigned int kvm_arch_para_hints(void); +void kvm_async_pf_task_wait_schedule(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_apf_flags(void); +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); + +DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + if (static_branch_unlikely(&kvm_async_pf_enabled)) + return __kvm_handle_async_pf(regs, token); + else + return false; +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void __init kvm_spinlock_init(void); +#else /* !CONFIG_PARAVIRT_SPINLOCKS */ +static inline void kvm_spinlock_init(void) +{ +} +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#else /* CONFIG_KVM_GUEST */ +#define kvm_async_pf_task_wait_schedule(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) + +static inline bool kvm_para_available(void) +{ + return false; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + +static inline u32 kvm_read_and_reset_apf_flags(void) +{ + return 0; +} + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + return false; +} +#endif + +#endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h new file mode 100644 index 000000000..08f1b57d3 --- /dev/null +++ b/arch/x86/include/asm/kvm_types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_TYPES_H +#define _ASM_X86_KVM_TYPES_H + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 + +#endif /* _ASM_X86_KVM_TYPES_H */ diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h new file mode 100644 index 000000000..1af2cb592 --- /dev/null +++ b/arch/x86/include/asm/kvm_vcpu_regs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_VCPU_REGS_H +#define _ASM_X86_KVM_VCPU_REGS_H + +#define __VCPU_REGS_RAX 0 +#define __VCPU_REGS_RCX 1 +#define __VCPU_REGS_RDX 2 +#define __VCPU_REGS_RBX 3 +#define __VCPU_REGS_RSP 4 +#define __VCPU_REGS_RBP 5 +#define __VCPU_REGS_RSI 6 +#define __VCPU_REGS_RDI 7 + +#ifdef CONFIG_X86_64 +#define __VCPU_REGS_R8 8 +#define __VCPU_REGS_R9 9 +#define __VCPU_REGS_R10 10 +#define __VCPU_REGS_R11 11 +#define __VCPU_REGS_R12 12 +#define __VCPU_REGS_R13 13 +#define __VCPU_REGS_R14 14 +#define __VCPU_REGS_R15 15 +#endif + +#endif /* _ASM_X86_KVM_VCPU_REGS_H */ diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h new file mode 100644 index 000000000..6c5765192 --- /dev/null +++ b/arch/x86/include/asm/kvmclock.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_CLOCK_H +#define _ASM_X86_KVM_CLOCK_H + +#include + +extern struct clocksource kvm_clock; + +DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} + +#endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h new file mode 100644 index 000000000..3a0282a6a --- /dev/null +++ b/arch/x86/include/asm/linkage.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_LINKAGE_H +#define _ASM_X86_LINKAGE_H + +#include +#include + +#undef notrace +#define notrace __attribute__((no_instrument_function)) + +#ifdef CONFIG_64BIT +/* + * The generic version tends to create spurious ENDBR instructions under + * certain conditions. + */ +#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; }) +#endif + +#ifdef CONFIG_X86_32 +#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) +#endif /* CONFIG_X86_32 */ + +#ifdef __ASSEMBLY__ + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) +#define __ALIGN .p2align 4, 0x90 +#define __ALIGN_STR __stringify(__ALIGN) +#endif + +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ +#ifdef CONFIG_SLS +#define RET ret; int3 +#else +#define RET ret +#endif +#endif /* CONFIG_RETPOLINE */ + +#else /* __ASSEMBLY__ */ + +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ +#ifdef CONFIG_SLS +#define ASM_RET "ret; int3\n\t" +#else +#define ASM_RET "ret\n\t" +#endif +#endif /* CONFIG_RETPOLINE */ + +#endif /* __ASSEMBLY__ */ + +#define __CFI_TYPE(name) \ + SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \ + .fill 11, 1, 0x90 ASM_NL \ + .byte 0xb8 ASM_NL \ + .long __kcfi_typeid_##name ASM_NL \ + SYM_FUNC_END(__cfi_##name) + +/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START -- use for global functions */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + ENDBR + +#endif /* _ASM_X86_LINKAGE_H */ + diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h new file mode 100644 index 000000000..349a47aca --- /dev/null +++ b/arch/x86/include/asm/local.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_LOCAL_H +#define _ASM_X86_LOCAL_H + +#include + +#include +#include + +typedef struct { + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l, i) atomic_long_set(&(l)->a, (i)) + +static inline void local_inc(local_t *l) +{ + asm volatile(_ASM_INC "%0" + : "+m" (l->a.counter)); +} + +static inline void local_dec(local_t *l) +{ + asm volatile(_ASM_DEC "%0" + : "+m" (l->a.counter)); +} + +static inline void local_add(long i, local_t *l) +{ + asm volatile(_ASM_ADD "%1,%0" + : "+m" (l->a.counter) + : "ir" (i)); +} + +static inline void local_sub(long i, local_t *l) +{ + asm volatile(_ASM_SUB "%1,%0" + : "+m" (l->a.counter) + : "ir" (i)); +} + +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer to type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool local_sub_and_test(long i, local_t *l) +{ + return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i); +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer to type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool local_dec_and_test(local_t *l) +{ + return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e); +} + +/** + * local_inc_and_test - increment and test + * @l: pointer to type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool local_inc_and_test(local_t *l) +{ + return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e); +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool local_add_negative(long i, local_t *l) +{ + return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i); +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static inline long local_add_return(long i, local_t *l) +{ + long __i = i; + asm volatile(_ASM_XADD "%0, %1;" + : "+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; +} + +static inline long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i, l); +} + +#define local_inc_return(l) (local_add_return(1, l)) +#define local_dec_return(l) (local_sub_return(1, l)) + +#define local_cmpxchg(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix */ +#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read((l)); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +/* On x86_32, these are no better than the atomic variants. + * On x86-64 these are better than the atomic variants on SMP kernels + * because they dont use a lock prefix. + */ +#define __local_inc(l) local_inc(l) +#define __local_dec(l) local_dec(l) +#define __local_add(i, l) local_add((i), (l)) +#define __local_sub(i, l) local_sub((i), (l)) + +#endif /* _ASM_X86_LOCAL_H */ diff --git a/arch/x86/include/asm/mach_timer.h b/arch/x86/include/asm/mach_timer.h new file mode 100644 index 000000000..044daf6fb --- /dev/null +++ b/arch/x86/include/asm/mach_timer.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Machine specific calibrate_tsc() for generic. + * Split out from timer_tsc.c by Osamu Tomita + */ +/* ------ Calibrate the TSC ------- + * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). + * Too much 64-bit arithmetic here to do this cleanly in C, and for + * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) + * output busy loop as low as possible. We avoid reading the CTC registers + * directly because of the awkward 8-bit access mechanism of the 82C54 + * device. + */ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_TIMER_H +#define _ASM_X86_MACH_DEFAULT_MACH_TIMER_H + +#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ +#define CALIBRATE_LATCH \ + ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) + +static inline void mach_prepare_counter(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, + * (interrupt on terminal count mode), binary count, + * load 5 * LATCH count, (LSB and MSB) to begin countdown. + * + * Some devices need a delay here. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb_p(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ + outb_p(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ +} + +static inline void mach_countup(unsigned long *count_p) +{ + unsigned long count = 0; + do { + count++; + } while ((inb_p(0x61) & 0x20) == 0); + *count_p = count; +} + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_TIMER_H */ diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h new file mode 100644 index 000000000..e39a51746 --- /dev/null +++ b/arch/x86/include/asm/mach_traps.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Machine specific NMI handling for generic. + * Split out from traps.c by Osamu Tomita + */ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H +#define _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H + +#include + +#define NMI_REASON_PORT 0x61 + +#define NMI_REASON_SERR 0x80 +#define NMI_REASON_IOCHK 0x40 +#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) + +#define NMI_REASON_CLEAR_SERR 0x04 +#define NMI_REASON_CLEAR_IOCHK 0x08 +#define NMI_REASON_CLEAR_MASK 0x0f + +static inline unsigned char default_get_nmi_reason(void) +{ + return inb(NMI_REASON_PORT); +} + +static inline void reassert_nmi(void) +{ + int old_reg = -1; + + if (do_i_have_lock_cmos()) + old_reg = current_lock_cmos_reg(); + else + lock_cmos(0); /* register doesn't matter here */ + outb(0x8f, 0x70); + inb(0x71); /* dummy */ + outb(0x0f, 0x70); + inb(0x71); /* dummy */ + if (old_reg >= 0) + outb(old_reg, 0x70); + else + unlock_cmos(); +} + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */ diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h new file mode 100644 index 000000000..3c4274308 --- /dev/null +++ b/arch/x86/include/asm/math_emu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MATH_EMU_H +#define _ASM_X86_MATH_EMU_H + +#include + +/* This structure matches the layout of the data saved to the stack + following a device-not-present interrupt, part of it saved + automatically by the 80386/80486. + */ +struct math_emu_info { + long ___orig_eip; + struct pt_regs *regs; +}; +#endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h new file mode 100644 index 000000000..6115bb3d5 --- /dev/null +++ b/arch/x86/include/asm/mc146818rtc.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_X86_MC146818RTC_H +#define _ASM_X86_MC146818RTC_H + +#include +#include + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +#if defined(CONFIG_X86_32) +/* + * This lock provides nmi access to the CMOS/RTC registers. It has some + * special properties. It is owned by a CPU and stores the index register + * currently being accessed (if owned). The idea here is that it works + * like a normal lock (normally). However, in an NMI, the NMI code will + * first check to see if its CPU owns the lock, meaning that the NMI + * interrupted during the read/write of the device. If it does, it goes ahead + * and performs the access and then restores the index register. If it does + * not, it locks normally. + * + * Note that since we are working with NMIs, we need this lock even in + * a non-SMP machine just to mark that the lock is owned. + * + * This only works with compare-and-swap. There is no other way to + * atomically claim the lock and set the owner. + */ +#include +extern volatile unsigned long cmos_lock; + +/* + * All of these below must be called with interrupts off, preempt + * disabled, etc. + */ + +static inline void lock_cmos(unsigned char reg) +{ + unsigned long new; + new = ((smp_processor_id() + 1) << 8) | reg; + for (;;) { + if (cmos_lock) { + cpu_relax(); + continue; + } + if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0) + return; + } +} + +static inline void unlock_cmos(void) +{ + cmos_lock = 0; +} + +static inline int do_i_have_lock_cmos(void) +{ + return (cmos_lock >> 8) == (smp_processor_id() + 1); +} + +static inline unsigned char current_lock_cmos_reg(void) +{ + return cmos_lock & 0xff; +} + +#define lock_cmos_prefix(reg) \ + do { \ + unsigned long cmos_flags; \ + local_irq_save(cmos_flags); \ + lock_cmos(reg) + +#define lock_cmos_suffix(reg) \ + unlock_cmos(); \ + local_irq_restore(cmos_flags); \ + } while (0) +#else +#define lock_cmos_prefix(reg) do {} while (0) +#define lock_cmos_suffix(reg) do {} while (0) +#define lock_cmos(reg) do { } while (0) +#define unlock_cmos() do { } while (0) +#define do_i_have_lock_cmos() 0 +#define current_lock_cmos_reg() 0 +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) rtc_cmos_read(addr) +#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr) +unsigned char rtc_cmos_read(unsigned char addr); +void rtc_cmos_write(unsigned char val, unsigned char addr); + +extern int mach_set_cmos_time(const struct timespec64 *now); +extern void mach_get_cmos_time(struct timespec64 *now); + +#define RTC_IRQ 8 + +#endif /* _ASM_X86_MC146818RTC_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h new file mode 100644 index 000000000..6e9860888 --- /dev/null +++ b/arch/x86/include/asm/mce.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MCE_H +#define _ASM_X86_MCE_H + +#include + +/* + * Machine Check support for x86 + */ + +/* MCG_CAP register defines */ +#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ +#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ +#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */ +#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ +#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ +#define MCG_EXT_CNT_SHIFT 16 +#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ +#define MCG_ELOG_P BIT_ULL(26) /* Extended error log supported */ +#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ + +/* MCG_STATUS register defines */ +#define MCG_STATUS_RIPV BIT_ULL(0) /* restart ip valid */ +#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */ +#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */ + +/* MCG_EXT_CTL register defines */ +#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */ + +/* MCi_STATUS register defines */ +#define MCI_STATUS_VAL BIT_ULL(63) /* valid error */ +#define MCI_STATUS_OVER BIT_ULL(62) /* previous errors lost */ +#define MCI_STATUS_UC BIT_ULL(61) /* uncorrected error */ +#define MCI_STATUS_EN BIT_ULL(60) /* error enabled */ +#define MCI_STATUS_MISCV BIT_ULL(59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV BIT_ULL(58) /* addr reg. valid */ +#define MCI_STATUS_PCC BIT_ULL(57) /* processor context corrupt */ +#define MCI_STATUS_S BIT_ULL(56) /* Signaled machine check */ +#define MCI_STATUS_AR BIT_ULL(55) /* Action required */ +#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ +#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) +#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) +#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff) + +/* AMD-specific bits */ +#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ +#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ +#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ +#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ +#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */ + +/* + * McaX field if set indicates a given bank supports MCA extensions: + * - Deferred error interrupt type is specifiable by bank. + * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers, + * But should not be used to determine MSR numbers. + * - TCC bit is present in MCx_STATUS. + */ +#define MCI_CONFIG_MCAX 0x1 +#define MCI_IPID_MCATYPE 0xFFFF0000 +#define MCI_IPID_HWID 0xFFF + +/* + * Note that the full MCACOD field of IA32_MCi_STATUS MSR is + * bits 15:0. But bit 12 is the 'F' bit, defined for corrected + * errors to indicate that errors are being filtered by hardware. + * We should mask out bit 12 when looking for specific signatures + * of uncorrected errors - so the F bit is deliberately skipped + * in this #define. + */ +#define MCACOD 0xefff /* MCA Error Code */ + +/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ +#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ +#define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */ +#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ +#define MCACOD_DATA 0x0134 /* Data Load */ +#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ + +/* MCi_MISC register defines */ +#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) +#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) +#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ +#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ +#define MCI_MISC_ADDR_PHYS 2 /* physical address */ +#define MCI_MISC_ADDR_MEM 3 /* memory address */ +#define MCI_MISC_ADDR_GENERIC 7 /* generic */ + +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN BIT_ULL(30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL + +#define MCJ_CTX_MASK 3 +#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) +#define MCJ_CTX_RANDOM 0 /* inject context: random */ +#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ +#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 0x8 /* raise as exception */ +#define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */ + +#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ + +#define MCE_LOG_MIN_LEN 32U +#define MCE_LOG_SIGNATURE "MACHINECHECK" + +/* AMD Scalable MCA */ +#define MSR_AMD64_SMCA_MC0_CTL 0xc0002000 +#define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001 +#define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002 +#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 +#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 +#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006 +#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 +#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 +#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) + +#define XEC(x, mask) (((x) >> 16) & mask) + +/* mce.kflags flag bits for logging etc. */ +#define MCE_HANDLED_CEC BIT_ULL(0) +#define MCE_HANDLED_UC BIT_ULL(1) +#define MCE_HANDLED_EXTLOG BIT_ULL(2) +#define MCE_HANDLED_NFIT BIT_ULL(3) +#define MCE_HANDLED_EDAC BIT_ULL(4) +#define MCE_HANDLED_MCELOG BIT_ULL(5) + +/* + * Indicates an MCE which has happened in kernel space but from + * which the kernel can recover simply by executing fixup_exception() + * so that an error is returned to the caller of the function that + * hit the machine check. + */ +#define MCE_IN_KERNEL_RECOV BIT_ULL(6) + +/* + * Indicates an MCE that happened in kernel space while copying data + * from user. In this case fixup_exception() gets the kernel to the + * error exit for the copy function. Machine check handler can then + * treat it like a fault taken in user mode. + */ +#define MCE_IN_KERNEL_COPYIN BIT_ULL(7) + +/* + * This structure contains all data related to the MCE log. Also + * carries a signature to make it easier to find from external + * debugging tools. Each entry is only valid when its finished flag + * is set. + */ +struct mce_log_buffer { + char signature[12]; /* "MACHINECHECK" */ + unsigned len; /* = elements in .mce_entry[] */ + unsigned next; + unsigned flags; + unsigned recordlen; /* length of struct mce */ + struct mce entry[]; +}; + +/* Highest last */ +enum mce_notifier_prios { + MCE_PRIO_LOWEST, + MCE_PRIO_MCELOG, + MCE_PRIO_EDAC, + MCE_PRIO_NFIT, + MCE_PRIO_EXTLOG, + MCE_PRIO_UC, + MCE_PRIO_EARLY, + MCE_PRIO_CEC, + MCE_PRIO_HIGHEST = MCE_PRIO_CEC +}; + +struct notifier_block; +extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_unregister_decode_chain(struct notifier_block *nb); + +#include +#include + +extern int mce_p5_enabled; + +#ifdef CONFIG_ARCH_HAS_COPY_MC +extern void enable_copy_mc_fragile(void); +unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); +#else +static inline void enable_copy_mc_fragile(void) +{ +} +#endif + +struct cper_ia_proc_ctx; + +#ifdef CONFIG_X86_MCE +int mcheck_init(void); +void mcheck_cpu_init(struct cpuinfo_x86 *c); +void mcheck_cpu_clear(struct cpuinfo_x86 *c); +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); +#else +static inline int mcheck_init(void) { return 0; } +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} +static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} +static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) { return -EINVAL; } +#endif + +void mce_setup(struct mce *m); +void mce_log(struct mce *m); +DECLARE_PER_CPU(struct device *, mce_device); + +/* Maximum number of MCA banks per CPU. */ +#define MAX_NR_BANKS 64 + +#ifdef CONFIG_X86_MCE_INTEL +void mce_intel_feature_init(struct cpuinfo_x86 *c); +void mce_intel_feature_clear(struct cpuinfo_x86 *c); +void cmci_clear(void); +void cmci_reenable(void); +void cmci_rediscover(void); +void cmci_recheck(void); +#else +static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } +static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { } +static inline void cmci_clear(void) {} +static inline void cmci_reenable(void) {} +static inline void cmci_rediscover(void) {} +static inline void cmci_recheck(void) {} +#endif + +int mce_available(struct cpuinfo_x86 *c); +bool mce_is_memory_error(struct mce *m); +bool mce_is_correctable(struct mce *m); +int mce_usable_address(struct mce *m); + +DECLARE_PER_CPU(unsigned, mce_exception_count); +DECLARE_PER_CPU(unsigned, mce_poll_count); + +typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); +DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); + +enum mcp_flags { + MCP_TIMESTAMP = BIT(0), /* log time stamp */ + MCP_UC = BIT(1), /* log uncorrected errors */ + MCP_DONTLOG = BIT(2), /* only clear, don't log */ + MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ +}; +bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); + +int mce_notify_irq(void); + +DECLARE_PER_CPU(struct mce, injectm); + +/* Disable CMCI/polling for MCA bank claimed by firmware */ +extern void mce_disable_bank(int bank); + +/* + * Exception handler + */ +void do_machine_check(struct pt_regs *pt_regs); + +/* + * Threshold handler + */ +extern void (*mce_threshold_vector)(void); + +/* Deferred error interrupt handler */ +extern void (*deferred_error_int_vector)(void); + +/* + * Used by APEI to report memory error via /dev/mcelog + */ + +struct cper_sec_mem_err; +extern void apei_mce_report_mem_error(int corrected, + struct cper_sec_mem_err *mem_err); + +/* + * Enumerate new IP types and HWID values in AMD processors which support + * Scalable MCA. + */ +#ifdef CONFIG_X86_MCE_AMD + +/* These may be used by multiple smca_hwid_mcatypes */ +enum smca_bank_types { + SMCA_LS = 0, /* Load Store */ + SMCA_LS_V2, + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ + SMCA_RESERVED, /* Reserved */ + SMCA_EX, /* Execution Unit */ + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ + SMCA_CS_V2, + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_PSP_V2, + SMCA_SMU, /* System Management Unit */ + SMCA_SMU_V2, + SMCA_MP5, /* Microprocessor 5 Unit */ + SMCA_MPDMA, /* MPDMA Unit */ + SMCA_NBIO, /* Northbridge IO Unit */ + SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_NBIF, /* NBIF Unit */ + SMCA_SHUB, /* System HUB Unit */ + SMCA_SATA, /* SATA Unit */ + SMCA_USB, /* USB Unit */ + SMCA_GMI_PCS, /* GMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ + SMCA_GMI_PHY, /* GMI PHY Unit */ + N_SMCA_BANK_TYPES +}; + +extern const char *smca_get_long_name(enum smca_bank_types t); +extern bool amd_mce_is_memory_error(struct mce *m); + +extern int mce_threshold_create_device(unsigned int cpu); +extern int mce_threshold_remove_device(unsigned int cpu); + +void mce_amd_feature_init(struct cpuinfo_x86 *c); +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); +#else + +static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; +static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; +static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +#endif + +static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } +#endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h new file mode 100644 index 000000000..c91326593 --- /dev/null +++ b/arch/x86/include/asm/mem_encrypt.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#ifndef __X86_MEM_ENCRYPT_H__ +#define __X86_MEM_ENCRYPT_H__ + +#ifndef __ASSEMBLY__ + +#include +#include + +#include + +#ifdef CONFIG_X86_MEM_ENCRYPT +void __init mem_encrypt_init(void); +#else +static inline void mem_encrypt_init(void) { } +#endif + +#ifdef CONFIG_AMD_MEM_ENCRYPT + +extern u64 sme_me_mask; +extern u64 sev_status; + +void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, + unsigned long decrypted_kernel_vaddr, + unsigned long kernel_len, + unsigned long encryption_wa, + unsigned long encryption_pgd); + +void __init sme_early_encrypt(resource_size_t paddr, + unsigned long size); +void __init sme_early_decrypt(resource_size_t paddr, + unsigned long size); + +void __init sme_map_bootdata(char *real_mode_data); +void __init sme_unmap_bootdata(char *real_mode_data); + +void __init sme_early_init(void); +void __init sev_setup_arch(void); + +void __init sme_encrypt_kernel(struct boot_params *bp); +void __init sme_enable(struct boot_params *bp); + +int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); +int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, + unsigned long size, bool enc); + +void __init mem_encrypt_free_decrypted_mem(void); + +void __init sev_es_init_vc_handling(void); + +#define __bss_decrypted __section(".bss..decrypted") + +#else /* !CONFIG_AMD_MEM_ENCRYPT */ + +#define sme_me_mask 0ULL + +static inline void __init sme_early_encrypt(resource_size_t paddr, + unsigned long size) { } +static inline void __init sme_early_decrypt(resource_size_t paddr, + unsigned long size) { } + +static inline void __init sme_map_bootdata(char *real_mode_data) { } +static inline void __init sme_unmap_bootdata(char *real_mode_data) { } + +static inline void __init sme_early_init(void) { } +static inline void __init sev_setup_arch(void) { } + +static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } +static inline void __init sme_enable(struct boot_params *bp) { } + +static inline void sev_es_init_vc_handling(void) { } + +static inline int __init +early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } +static inline int __init +early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +static inline void __init +early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {} + +static inline void mem_encrypt_free_decrypted_mem(void) { } + +#define __bss_decrypted + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +void add_encrypt_protection_map(void); + +/* + * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when + * writing to or comparing values from the cr3 register. Having the + * encryption mask set in cr3 enables the PGD entry to be encrypted and + * avoid special case handling of PGD allocations. + */ +#define __sme_pa(x) (__pa(x) | sme_me_mask) +#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) + +extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; + +static inline u64 sme_get_me_mask(void) +{ + return sme_me_mask; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000..9ca760e43 --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include +#include + +extern bool pat_enabled(void); +extern void pat_disable(const char *reason); +extern void pat_init(void); +extern void init_cache_modes(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +extern void memtype_free_io(resource_size_t start, resource_size_t end); + +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +bool x86_has_pat_wp(void); +enum page_cache_mode pgprot2cachemode(pgprot_t pgprot); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h new file mode 100644 index 000000000..19a0b4005 --- /dev/null +++ b/arch/x86/include/asm/microcode.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MICROCODE_H +#define _ASM_X86_MICROCODE_H + +#include +#include +#include +#include + +struct ucode_patch { + struct list_head plist; + void *data; /* Intel uses only this one */ + unsigned int size; + u32 patch_id; + u16 equiv_cpu; +}; + +extern struct list_head microcode_cache; + +struct cpu_signature { + unsigned int sig; + unsigned int pf; + unsigned int rev; +}; + +struct device; + +enum ucode_state { + UCODE_OK = 0, + UCODE_NEW, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; + +struct microcode_ops { + enum ucode_state (*request_microcode_fw) (int cpu, struct device *, + bool refresh_fw); + + void (*microcode_fini_cpu) (int cpu); + + /* + * The generic 'microcode_core' part guarantees that + * the callbacks below run on a target cpu when they + * are being called. + * See also the "Synchronization" section in microcode_core.c. + */ + enum ucode_state (*apply_microcode) (int cpu); + int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); +}; + +struct ucode_cpu_info { + struct cpu_signature cpu_sig; + int valid; + void *mc; +}; +extern struct ucode_cpu_info ucode_cpu_info[]; +struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa); + +#ifdef CONFIG_MICROCODE_INTEL +extern struct microcode_ops * __init init_intel_microcode(void); +#else +static inline struct microcode_ops * __init init_intel_microcode(void) +{ + return NULL; +} +#endif /* CONFIG_MICROCODE_INTEL */ + +#ifdef CONFIG_MICROCODE_AMD +extern struct microcode_ops * __init init_amd_microcode(void); +extern void __exit exit_amd_microcode(void); +#else +static inline struct microcode_ops * __init init_amd_microcode(void) +{ + return NULL; +} +static inline void __exit exit_amd_microcode(void) {} +#endif + +#define MAX_UCODE_COUNT 128 + +#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) +#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') +#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') +#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') +#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') +#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') +#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') + +#define CPUID_IS(a, b, c, ebx, ecx, edx) \ + (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) + +/* + * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. + * x86_cpuid_vendor() gets vendor id for BSP. + * + * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify + * coding, we still use x86_cpuid_vendor() to get vendor id for AP. + * + * x86_cpuid_vendor() gets vendor information directly from CPUID. + */ +static inline int x86_cpuid_vendor(void) +{ + u32 eax = 0x00000000; + u32 ebx, ecx = 0, edx; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) + return X86_VENDOR_INTEL; + + if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) + return X86_VENDOR_AMD; + + return X86_VENDOR_UNKNOWN; +} + +static inline unsigned int x86_cpuid_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + return x86_family(eax); +} + +#ifdef CONFIG_MICROCODE +extern void __init load_ucode_bsp(void); +extern void load_ucode_ap(void); +void reload_early_microcode(unsigned int cpu); +extern bool initrd_gone; +void microcode_bsp_resume(void); +#else +static inline void __init load_ucode_bsp(void) { } +static inline void load_ucode_ap(void) { } +static inline void reload_early_microcode(unsigned int cpu) { } +static inline void microcode_bsp_resume(void) { } +#endif + +#endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h new file mode 100644 index 000000000..9675c621c --- /dev/null +++ b/arch/x86/include/asm/microcode_amd.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MICROCODE_AMD_H +#define _ASM_X86_MICROCODE_AMD_H + +#include + +#define UCODE_MAGIC 0x00414d44 +#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 +#define UCODE_UCODE_TYPE 0x00000001 + +#define SECTION_HDR_SIZE 8 +#define CONTAINER_HDR_SZ 12 + +struct equiv_cpu_entry { + u32 installed_cpu; + u32 fixed_errata_mask; + u32 fixed_errata_compare; + u16 equiv_cpu; + u16 res; +} __attribute__((packed)); + +struct microcode_header_amd { + u32 data_code; + u32 patch_id; + u16 mc_patch_data_id; + u8 mc_patch_data_len; + u8 init_flag; + u32 mc_patch_data_checksum; + u32 nb_dev_id; + u32 sb_dev_id; + u16 processor_rev_id; + u8 nb_rev_id; + u8 sb_rev_id; + u8 bios_api_rev; + u8 reserved1[3]; + u32 match_reg[8]; +} __attribute__((packed)); + +struct microcode_amd { + struct microcode_header_amd hdr; + unsigned int mpb[]; +}; + +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) + +#ifdef CONFIG_MICROCODE_AMD +extern void __init load_ucode_amd_bsp(unsigned int family); +extern void load_ucode_amd_ap(unsigned int family); +extern int __init save_microcode_in_initrd_amd(unsigned int family); +void reload_ucode_amd(unsigned int cpu); +extern void amd_check_microcode(void); +#else +static inline void __init load_ucode_amd_bsp(unsigned int family) {} +static inline void load_ucode_amd_ap(unsigned int family) {} +static inline int __init +save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } +static inline void reload_ucode_amd(unsigned int cpu) {} +static inline void amd_check_microcode(void) {} +#endif +#endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h new file mode 100644 index 000000000..4c92cea7e --- /dev/null +++ b/arch/x86/include/asm/microcode_intel.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MICROCODE_INTEL_H +#define _ASM_X86_MICROCODE_INTEL_H + +#include + +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[]; +}; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[]; +}; + +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) + +#define get_totalsize(mc) \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.totalsize : \ + DEFAULT_UCODE_TOTALSIZE) + +#define get_datasize(mc) \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + +#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +static inline u32 intel_get_microcode_revision(void) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; +} + +#ifdef CONFIG_MICROCODE_INTEL +extern void __init load_ucode_intel_bsp(void); +extern void load_ucode_intel_ap(void); +extern void show_ucode_info_early(void); +extern int __init save_microcode_in_initrd_intel(void); +void reload_ucode_intel(void); +#else +static inline __init void load_ucode_intel_bsp(void) {} +static inline void load_ucode_intel_ap(void) {} +static inline void show_ucode_info_early(void) {} +static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } +static inline void reload_ucode_intel(void) {} +#endif + +#endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/misc.h b/arch/x86/include/asm/misc.h new file mode 100644 index 000000000..bb049cca3 --- /dev/null +++ b/arch/x86/include/asm/misc.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MISC_H +#define _ASM_X86_MISC_H + +int num_digits(int val); + +#endif /* _ASM_X86_MISC_H */ diff --git a/arch/x86/include/asm/mmconfig.h b/arch/x86/include/asm/mmconfig.h new file mode 100644 index 000000000..976486447 --- /dev/null +++ b/arch/x86/include/asm/mmconfig.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MMCONFIG_H +#define _ASM_X86_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void fam10h_check_enable_mmcfg(void); +extern void check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif /* _ASM_X86_MMCONFIG_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h new file mode 100644 index 000000000..5d7494631 --- /dev/null +++ b/arch/x86/include/asm/mmu.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MMU_H +#define _ASM_X86_MMU_H + +#include +#include +#include +#include +#include + +/* Uprobes on this MM assume 32-bit code */ +#define MM_CONTEXT_UPROBE_IA32 BIT(0) +/* vsyscall page is accessible on this MM */ +#define MM_CONTEXT_HAS_VSYSCALL BIT(1) + +/* + * x86 has arch-specific MMU state beyond what lives in mm_struct. + */ +typedef struct { + /* + * ctx_id uniquely identifies this mm_struct. A ctx_id will never + * be reused, and zero is not a valid ctx_id. + */ + u64 ctx_id; + + /* + * Any code that needs to do any sort of TLB flushing for this + * mm will first make its changes to the page tables, then + * increment tlb_gen, then flush. This lets the low-level + * flushing code keep track of what needs flushing. + * + * This is not used on Xen PV. + */ + atomic64_t tlb_gen; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; +#endif + +#ifdef CONFIG_X86_64 + unsigned short flags; +#endif + + struct mutex lock; + void __user *vdso; /* vdso base address */ + const struct vdso_image *vdso_image; /* vdso image in use */ + + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + /* + * One bit per protection key says whether userspace can + * use it or not. protected by mmap_lock. + */ + u16 pkey_allocation_map; + s16 execute_only_pkey; +#endif +} mm_context_t; + +#define INIT_MM_CONTEXT(mm) \ + .context = { \ + .ctx_id = 1, \ + .lock = __MUTEX_INITIALIZER(mm.context.lock), \ + } + +void leave_mm(int cpu); +#define leave_mm leave_mm + +#endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h new file mode 100644 index 000000000..b8d40ddea --- /dev/null +++ b/arch/x86/include/asm/mmu_context.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MMU_CONTEXT_H +#define _ASM_X86_MMU_CONTEXT_H + +#include +#include +#include +#include + +#include + +#include +#include +#include + +extern atomic64_t last_mm_ctx_id; + +#ifndef CONFIG_PARAVIRT_XXL +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT_XXL */ + +#ifdef CONFIG_PERF_EVENTS +DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); +DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); +void cr4_update_pce(void *ignored); +#endif + +#ifdef CONFIG_MODIFY_LDT_SYSCALL +/* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + unsigned int nr_entries; + + /* + * If PTI is in use, then the entries array is not mapped while we're + * in user mode. The whole array will be aliased at the addressed + * given by ldt_slot_va(slot). We use two slots so that we can allocate + * and map, and enable a new LDT without invalidating the mapping + * of an older, still-in-use LDT. + * + * slot will be -1 if this LDT doesn't have an alias mapping. + */ + int slot; +}; + +/* + * Used for LDT copy/destruction. + */ +static inline void init_new_context_ldt(struct mm_struct *mm) +{ + mm->context.ldt = NULL; + init_rwsem(&mm->context.ldt_usr_sem); +} +int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); +void destroy_context_ldt(struct mm_struct *mm); +void ldt_arch_exit_mmap(struct mm_struct *mm); +#else /* CONFIG_MODIFY_LDT_SYSCALL */ +static inline void init_new_context_ldt(struct mm_struct *mm) { } +static inline int ldt_dup_context(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + return 0; +} +static inline void destroy_context_ldt(struct mm_struct *mm) { } +static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } +#endif + +#ifdef CONFIG_MODIFY_LDT_SYSCALL +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); +#else +static inline void load_mm_ldt(struct mm_struct *mm) +{ + clear_LDT(); +} +static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ + DEBUG_LOCKS_WARN_ON(preemptible()); +} +#endif + +#define enter_lazy_tlb enter_lazy_tlb +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); + +/* + * Init a new mm. Used on mm copies, like at fork() + * and on mm's that are brand-new, like at execve(). + */ +#define init_new_context init_new_context +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + mutex_init(&mm->context.lock); + + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); + atomic64_set(&mm->context.tlb_gen, 0); + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { + /* pkey 0 is the default and allocated implicitly */ + mm->context.pkey_allocation_map = 0x1; + /* -1 means unallocated or invalid */ + mm->context.execute_only_pkey = -1; + } +#endif + init_new_context_ldt(mm); + return 0; +} + +#define destroy_context destroy_context +static inline void destroy_context(struct mm_struct *mm) +{ + destroy_context_ldt(mm); +} + +extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); + +extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); +#define switch_mm_irqs_off switch_mm_irqs_off + +#define activate_mm(prev, next) \ +do { \ + paravirt_activate_mm((prev), (next)); \ + switch_mm((prev), (next), NULL); \ +} while (0); + +#ifdef CONFIG_X86_32 +#define deactivate_mm(tsk, mm) \ +do { \ + loadsegment(gs, 0); \ +} while (0) +#else +#define deactivate_mm(tsk, mm) \ +do { \ + load_gs_index(0); \ + loadsegment(fs, 0); \ +} while (0) +#endif + +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; +#endif +} + +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +{ + arch_dup_pkeys(oldmm, mm); + paravirt_arch_dup_mmap(oldmm, mm); + return ldt_dup_context(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_arch_exit_mmap(mm); + ldt_arch_exit_mmap(mm); +} + +#ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !IS_ENABLED(CONFIG_IA32_EMULATION) || + !(mm->context.flags & MM_CONTEXT_UPROBE_IA32); +} +#else +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return false; +} +#endif + +static inline void arch_unmap(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ +} + +/* + * We only want to enforce protection keys on the current process + * because we effectively have no access to PKRU for other + * processes or any way to tell *which * PKRU in a threaded + * process we could use. + * + * So do not enforce things if the VMA is not from the current + * mm, or if we are in a kernel thread. + */ +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool execute, bool foreign) +{ + /* pkeys never affect instruction fetches */ + if (execute) + return true; + /* allow access if the VMA is not one from this process */ + if (foreign || vma_is_foreign(vma)) + return true; + return __pkru_allows_pkey(vma_pkey(vma), write); +} + +unsigned long __get_current_cr3_fast(void); + +#include + +#endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h new file mode 100644 index 000000000..c41b41edd --- /dev/null +++ b/arch/x86/include/asm/mmzone.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h new file mode 100644 index 000000000..2d4515e8b --- /dev/null +++ b/arch/x86/include/asm/mmzone_32.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002 + * + */ + +#ifndef _ASM_X86_MMZONE_32_H +#define _ASM_X86_MMZONE_32_H + +#include + +#ifdef CONFIG_NUMA +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[nid]) +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_X86_MMZONE_32_H */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h new file mode 100644 index 000000000..0c585046f --- /dev/null +++ b/arch/x86/include/asm/mmzone_64.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* K8 NUMA support */ +/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */ +/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */ +#ifndef _ASM_X86_MMZONE_64_H +#define _ASM_X86_MMZONE_64_H + +#ifdef CONFIG_NUMA + +#include +#include + +extern struct pglist_data *node_data[]; + +#define NODE_DATA(nid) (node_data[nid]) + +#endif +#endif /* _ASM_X86_MMZONE_64_H */ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h new file mode 100644 index 000000000..e988bac0a --- /dev/null +++ b/arch/x86/include/asm/module.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MODULE_H +#define _ASM_X86_MODULE_H + +#include +#include + +struct mod_arch_specific { +#ifdef CONFIG_UNWINDER_ORC + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; +#endif +}; + +#endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h new file mode 100644 index 000000000..e90ac7e9a --- /dev/null +++ b/arch/x86/include/asm/mpspec.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MPSPEC_H +#define _ASM_X86_MPSPEC_H + + +#include +#include +#include + +extern int pic_mode; + +#ifdef CONFIG_X86_32 + +/* + * Summit or generic (i.e. installer) kernels need lots of bus entries. + * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. + */ +#if CONFIG_BASE_SMALL == 0 +# define MAX_MP_BUSSES 260 +#else +# define MAX_MP_BUSSES 32 +#endif + +#define MAX_IRQ_SOURCES 256 + +extern unsigned int def_to_bigsmp; + +#else /* CONFIG_X86_64: */ + +#define MAX_MP_BUSSES 256 +/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) + +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_EISA +extern int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +extern unsigned int boot_cpu_physical_apicid; +extern u8 boot_cpu_apic_version; +extern unsigned long mp_lapic_addr; + +#ifdef CONFIG_X86_LOCAL_APIC +extern int smp_found_config; +#else +# define smp_found_config 0 +#endif + +static inline void get_smp_config(void) +{ + x86_init.mpparse.get_smp_config(0); +} + +static inline void early_get_smp_config(void) +{ + x86_init.mpparse.get_smp_config(1); +} + +static inline void find_smp_config(void) +{ + x86_init.mpparse.find_smp_config(); +} + +#ifdef CONFIG_X86_MPPARSE +extern void e820__memblock_alloc_reserved_mpc_new(void); +extern int enable_update_mptable; +extern void default_find_smp_config(void); +extern void default_get_smp_config(unsigned int early); +#else +static inline void e820__memblock_alloc_reserved_mpc_new(void) { } +#define enable_update_mptable 0 +#define default_find_smp_config x86_init_noop +#define default_get_smp_config x86_init_uint_noop +#endif + +int generic_processor_info(int apicid, int version); + +#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) + +struct physid_mask { + unsigned long mask[PHYSID_ARRAY_SIZE]; +}; + +typedef struct physid_mask physid_mask_t; + +#define physid_set(physid, map) set_bit(physid, (map).mask) +#define physid_clear(physid, map) clear_bit(physid, (map).mask) +#define physid_isset(physid, map) test_bit(physid, (map).mask) +#define physid_test_and_set(physid, map) \ + test_and_set_bit(physid, (map).mask) + +#define physids_and(dst, src1, src2) \ + bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) + +#define physids_or(dst, src1, src2) \ + bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) + +#define physids_clear(map) \ + bitmap_zero((map).mask, MAX_LOCAL_APIC) + +#define physids_complement(dst, src) \ + bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC) + +#define physids_empty(map) \ + bitmap_empty((map).mask, MAX_LOCAL_APIC) + +#define physids_equal(map1, map2) \ + bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC) + +#define physids_weight(map) \ + bitmap_weight((map).mask, MAX_LOCAL_APIC) + +#define physids_shift_right(d, s, n) \ + bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC) + +#define physids_shift_left(d, s, n) \ + bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC) + +static inline unsigned long physids_coerce(physid_mask_t *map) +{ + return map->mask[0]; +} + +static inline void physids_promote(unsigned long physids, physid_mask_t *map) +{ + physids_clear(*map); + map->mask[0] = physids; +} + +static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +{ + physids_clear(*map); + physid_set(physid, *map); +} + +#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } +#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } + +extern physid_mask_t phys_cpu_present_map; + +#endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h new file mode 100644 index 000000000..6fb923a34 --- /dev/null +++ b/arch/x86/include/asm/mpspec_def.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MPSPEC_DEF_H +#define _ASM_X86_MPSPEC_DEF_H + +/* + * Structure definitions for SMP machines following the + * Intel Multiprocessing Specification 1.1 and 1.4. + */ + +/* + * This tag identifies where the SMP configuration + * information is. + */ + +#define SMP_MAGIC_IDENT (('_'<<24) | ('P'<<16) | ('M'<<8) | '_') + +#ifdef CONFIG_X86_32 +# define MAX_MPC_ENTRY 1024 +#endif + +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ +}; + +#define MPC_SIGNATURE "PCMP" + +struct mpc_table { + char signature[4]; + unsigned short length; /* Size of table */ + char spec; /* 0x01 */ + char checksum; + char oem[8]; + char productid[12]; + unsigned int oemptr; /* 0 if not present */ + unsigned short oemsize; /* 0 if not present */ + unsigned short oemcount; + unsigned int lapic; /* APIC address */ + unsigned int reserved; +}; + +/* Followed by entries */ + +#define MP_PROCESSOR 0 +#define MP_BUS 1 +#define MP_IOAPIC 2 +#define MP_INTSRC 3 +#define MP_LINTSRC 4 +/* Used by IBM NUMA-Q to describe node locality */ +#define MP_TRANSLATION 192 + +#define CPU_ENABLED 1 /* Processor is available */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the boot CPU */ + +#define CPU_STEPPING_MASK 0x000F +#define CPU_MODEL_MASK 0x00F0 +#define CPU_FAMILY_MASK 0x0F00 + +struct mpc_cpu { + unsigned char type; + unsigned char apicid; /* Local APIC number */ + unsigned char apicver; /* Its versions */ + unsigned char cpuflag; + unsigned int cpufeature; + unsigned int featureflag; /* CPUID feature value */ + unsigned int reserved[2]; +}; + +struct mpc_bus { + unsigned char type; + unsigned char busid; + unsigned char bustype[6]; +}; + +/* List of Bus Type string values, Intel MP Spec. */ +#define BUSTYPE_EISA "EISA" +#define BUSTYPE_ISA "ISA" +#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ +#define BUSTYPE_MCA "MCA" /* Obsolete */ +#define BUSTYPE_VL "VL" /* Local bus */ +#define BUSTYPE_PCI "PCI" +#define BUSTYPE_PCMCIA "PCMCIA" +#define BUSTYPE_CBUS "CBUS" +#define BUSTYPE_CBUSII "CBUSII" +#define BUSTYPE_FUTURE "FUTURE" +#define BUSTYPE_MBI "MBI" +#define BUSTYPE_MBII "MBII" +#define BUSTYPE_MPI "MPI" +#define BUSTYPE_MPSA "MPSA" +#define BUSTYPE_NUBUS "NUBUS" +#define BUSTYPE_TC "TC" +#define BUSTYPE_VME "VME" +#define BUSTYPE_XPRESS "XPRESS" + +#define MPC_APIC_USABLE 0x01 + +struct mpc_ioapic { + unsigned char type; + unsigned char apicid; + unsigned char apicver; + unsigned char flags; + unsigned int apicaddr; +}; + +struct mpc_intsrc { + unsigned char type; + unsigned char irqtype; + unsigned short irqflag; + unsigned char srcbus; + unsigned char srcbusirq; + unsigned char dstapic; + unsigned char dstirq; +}; + +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; + +#define MP_IRQPOL_DEFAULT 0x0 +#define MP_IRQPOL_ACTIVE_HIGH 0x1 +#define MP_IRQPOL_RESERVED 0x2 +#define MP_IRQPOL_ACTIVE_LOW 0x3 +#define MP_IRQPOL_MASK 0x3 + +#define MP_IRQTRIG_DEFAULT 0x0 +#define MP_IRQTRIG_EDGE 0x4 +#define MP_IRQTRIG_RESERVED 0x8 +#define MP_IRQTRIG_LEVEL 0xc +#define MP_IRQTRIG_MASK 0xc + +#define MP_APIC_ALL 0xFF + +struct mpc_lintsrc { + unsigned char type; + unsigned char irqtype; + unsigned short irqflag; + unsigned char srcbusid; + unsigned char srcbusirq; + unsigned char destapic; + unsigned char destapiclint; +}; + +#define MPC_OEM_SIGNATURE "_OEM" + +struct mpc_oemtable { + char signature[4]; + unsigned short length; /* Size of table */ + char rev; /* 0x01 */ + char checksum; + char mpc[8]; +}; + +/* + * Default configurations + * + * 1 2 CPU ISA 82489DX + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining + * 3 2 CPU EISA 82489DX + * 4 2 CPU MCA 82489DX + * 5 2 CPU ISA+PCI + * 6 2 CPU EISA+PCI + * 7 2 CPU MCA+PCI + */ + +enum mp_bustype { + MP_BUS_ISA = 1, + MP_BUS_EISA, + MP_BUS_PCI, +}; +#endif /* _ASM_X86_MPSPEC_DEF_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h new file mode 100644 index 000000000..61f0c206b --- /dev/null +++ b/arch/x86/include/asm/mshyperv.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MSHYPER_H +#define _ASM_X86_MSHYPER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +union hv_ghcb; + +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); + +typedef int (*hyperv_fill_flush_list_func)( + struct hv_guest_mapping_flush_list *flush, + void *data); + +#define hv_get_raw_timer() rdtsc_ordered() + +void hyperv_vector_handler(struct pt_regs *regs); + +#if IS_ENABLED(CONFIG_HYPERV) +extern int hyperv_init_cpuhp; + +extern void *hv_hypercall_pg; + +extern u64 hv_current_partition_id; + +extern union hv_ghcb * __percpu *hv_ghcb_pg; + +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); +int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); + +static inline u64 hv_do_hypercall(u64 control, void *input, void *output) +{ + u64 input_address = input ? virt_to_phys(input) : 0; + u64 output_address = output ? virt_to_phys(output) : 0; + u64 hv_status; + +#ifdef CONFIG_X86_64 + if (!hv_hypercall_pg) + return U64_MAX; + + __asm__ __volatile__("mov %4, %%r8\n" + CALL_NOSPEC + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input_address) + : "r" (output_address), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "memory", "r8", "r9", "r10", "r11"); +#else + u32 input_address_hi = upper_32_bits(input_address); + u32 input_address_lo = lower_32_bits(input_address); + u32 output_address_hi = upper_32_bits(output_address); + u32 output_address_lo = lower_32_bits(output_address); + + if (!hv_hypercall_pg) + return U64_MAX; + + __asm__ __volatile__(CALL_NOSPEC + : "=A" (hv_status), + "+c" (input_address_lo), ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input_address_hi), + "D"(output_address_hi), "S"(output_address_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "memory"); +#endif /* !x86_64 */ + return hv_status; +} + +/* Fast hypercall with 8 bytes of input and no output */ +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +{ + u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + +#ifdef CONFIG_X86_64 + { + __asm__ __volatile__(CALL_NOSPEC + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : THUNK_TARGET(hv_hypercall_pg) + : "cc", "r8", "r9", "r10", "r11"); + } +#else + { + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input1_hi), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "edi", "esi"); + } +#endif + return hv_status; +} + +/* Fast hypercall with 16 bytes of input */ +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +{ + u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + +#ifdef CONFIG_X86_64 + { + __asm__ __volatile__("mov %4, %%r8\n" + CALL_NOSPEC + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : "r" (input2), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "r8", "r9", "r10", "r11"); + } +#else + { + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u32 input2_hi = upper_32_bits(input2); + u32 input2_lo = lower_32_bits(input2); + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), ASM_CALL_CONSTRAINT + : "A" (control), "b" (input1_hi), + "D"(input2_hi), "S"(input2_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc"); + } +#endif + return hv_status; +} + +extern struct hv_vp_assist_page **hv_vp_assist_page; + +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + if (!hv_vp_assist_page) + return NULL; + + return hv_vp_assist_page[cpu]; +} + +void __init hyperv_init(void); +void hyperv_setup_mmu_ops(void); +void set_hv_tscchange_cb(void (*cb)(void)); +void clear_hv_tscchange_cb(void); +void hyperv_stop_tsc_emulation(void); +int hyperv_flush_guest_mapping(u64 as); +int hyperv_flush_guest_mapping_range(u64 as, + hyperv_fill_flush_list_func fill_func, void *data); +int hyperv_fill_flush_guest_mapping_list( + struct hv_guest_mapping_flush_list *flush, + u64 start_gfn, u64 end_gfn); + +#ifdef CONFIG_X86_64 +void hv_apic_init(void); +void __init hv_init_spinlocks(void); +bool hv_vcpu_is_preempted(int vcpu); +#else +static inline void hv_apic_init(void) {} +#endif + +struct irq_domain *hv_create_pci_msi_domain(void); + +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, + struct hv_interrupt_entry *entry); +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); +int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); + +#ifdef CONFIG_AMD_MEM_ENCRYPT +void hv_ghcb_msr_write(u64 msr, u64 value); +void hv_ghcb_msr_read(u64 msr, u64 *value); +bool hv_ghcb_negotiate_protocol(void); +void hv_ghcb_terminate(unsigned int set, unsigned int reason); +#else +static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} +static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +static inline bool hv_ghcb_negotiate_protocol(void) { return false; } +static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} +#endif + +extern bool hv_isolation_type_snp(void); + +static inline bool hv_is_synic_reg(unsigned int reg) +{ + if ((reg >= HV_REGISTER_SCONTROL) && + (reg <= HV_REGISTER_SINT15)) + return true; + return false; +} + +static inline u64 hv_get_register(unsigned int reg) +{ + u64 value; + + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) + hv_ghcb_msr_read(reg, &value); + else + rdmsrl(reg, value); + return value; +} + +static inline void hv_set_register(unsigned int reg, u64 value) +{ + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { + hv_ghcb_msr_write(reg, value); + + /* Write proxy bit via wrmsl instruction */ + if (reg >= HV_REGISTER_SINT0 && + reg <= HV_REGISTER_SINT15) + wrmsrl(reg, value | 1 << 20); + } else { + wrmsrl(reg, value); + } +} + +#else /* CONFIG_HYPERV */ +static inline void hyperv_init(void) {} +static inline void hyperv_setup_mmu_ops(void) {} +static inline void set_hv_tscchange_cb(void (*cb)(void)) {} +static inline void clear_hv_tscchange_cb(void) {} +static inline void hyperv_stop_tsc_emulation(void) {}; +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + return NULL; +} +static inline int hyperv_flush_guest_mapping(u64 as) { return -1; } +static inline int hyperv_flush_guest_mapping_range(u64 as, + hyperv_fill_flush_list_func fill_func, void *data) +{ + return -1; +} +static inline void hv_set_register(unsigned int reg, u64 value) { } +static inline u64 hv_get_register(unsigned int reg) { return 0; } +static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, + bool visible) +{ + return -1; +} +#endif /* CONFIG_HYPERV */ + + +#include + +#endif diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h new file mode 100644 index 000000000..d71c7e8b7 --- /dev/null +++ b/arch/x86/include/asm/msi.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MSI_H +#define _ASM_X86_MSI_H +#include +#include + +typedef struct irq_alloc_info msi_alloc_info_t; + +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, + msi_alloc_info_t *arg); + +/* Structs and defines for the X86 specific MSI message format */ + +typedef struct x86_msi_data { + union { + struct { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + }; + u32 dmar_subhandle; + }; +} __attribute__ ((packed)) arch_msi_msg_data_t; +#define arch_msi_msg_data x86_msi_data + +typedef struct x86_msi_addr_lo { + union { + struct { + u32 reserved_0 : 2, + dest_mode_logical : 1, + redirect_hint : 1, + reserved_1 : 1, + virt_destid_8_14 : 7, + destid_0_7 : 8, + base_address : 12; + }; + struct { + u32 dmar_reserved_0 : 2, + dmar_index_15 : 1, + dmar_subhandle_valid : 1, + dmar_format : 1, + dmar_index_0_14 : 15, + dmar_base_address : 12; + }; + }; +} __attribute__ ((packed)) arch_msi_msg_addr_lo_t; +#define arch_msi_msg_addr_lo x86_msi_addr_lo + +#define X86_MSI_BASE_ADDRESS_LOW (0xfee00000 >> 20) + +typedef struct x86_msi_addr_hi { + u32 reserved : 8, + destid_8_31 : 24; +} __attribute__ ((packed)) arch_msi_msg_addr_hi_t; +#define arch_msi_msg_addr_hi x86_msi_addr_hi + +#define X86_MSI_BASE_ADDRESS_HIGH (0) + +struct msi_msg; +u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid); + +#endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h new file mode 100644 index 000000000..ec955ab2f --- /dev/null +++ b/arch/x86/include/asm/msr-index.h @@ -0,0 +1,1120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MSR_INDEX_H +#define _ASM_X86_MSR_INDEX_H + +#include + +/* + * CPU model specific register (MSR) numbers. + * + * Do not add new entries to this file unless the definitions are shared + * between multiple compilation units. + */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) +#define EFER_LMSLE (1<<_EFER_LMSLE) +#define EFER_FFXSR (1<<_EFER_FFXSR) + +/* Intel MSRs. Some also available on other CPUs */ + +#define MSR_TEST_CTRL 0x00000033 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT) + +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + +/* A mask for bits which the kernel toggles when controlling mitigations */ +#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ + | SPEC_CTRL_RRSBA_DIS_S) + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) + +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + +/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ +#define MSR_IA32_CORE_CAPS 0x000000cf +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT) +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) + +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ +#define ARCH_CAP_GDS_CTRL BIT(25) /* + * CPU is vulnerable to Gather + * Data Sampling (GDS) and + * has controls for mitigation. + */ +#define ARCH_CAP_GDS_NO BIT(26) /* + * CPU is not vulnerable to Gather + * Data Sampling (GDS). + */ + +#define ARCH_CAP_XAPIC_DISABLE BIT(21) /* + * IA32_XAPIC_DISABLE_STATUS MSR + * supported + */ + +#define MSR_IA32_FLUSH_CMD 0x0000010b +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ + +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ +#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ +#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_ERROR_CONTROL 0x0000017f +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 + +#define MSR_IA32_POWER_CTL 0x000001fc +#define MSR_IA32_POWER_CTL_BIT_EE 19 + +/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ +#define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) + +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60) +#define LBR_INFO_CYCLES 0xffff +#define LBR_INFO_BR_TYPE_OFFSET 56 +#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) + +#define MSR_ARCH_LBR_CTL 0x000014ce +#define ARCH_LBR_CTL_LBREN BIT(0) +#define ARCH_LBR_CTL_CPL_OFFSET 1 +#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET) +#define ARCH_LBR_CTL_STACK_OFFSET 3 +#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET) +#define ARCH_LBR_CTL_FILTER_OFFSET 16 +#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET) +#define MSR_ARCH_LBR_DEPTH 0x000014cf +#define MSR_ARCH_LBR_FROM_0 0x00001500 +#define MSR_ARCH_LBR_TO_0 0x00001600 +#define MSR_ARCH_LBR_INFO_0 0x00001200 + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_PEBS_DATA_CFG 0x000003f2 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define PERF_CAP_METRICS_IDX 15 +#define PERF_CAP_PT_IDX 16 + +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 +#define PERF_CAP_PEBS_TRAP BIT_ULL(6) +#define PERF_CAP_ARCH_REG BIT_ULL(7) +#define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_PEBS_BASELINE BIT_ULL(14) +#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ + PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_CYCLEACC BIT(1) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_PWR_EVT_EN BIT(4) +#define RTIT_CTL_FUP_ON_PTW BIT(5) +#define RTIT_CTL_FABRIC_EN BIT(6) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_MTC_EN BIT(9) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_PTW_EN BIT(12) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_EVENT_EN BIT(31) +#define RTIT_CTL_NOTNT BIT_ULL(55) +#define RTIT_CTL_MTC_RANGE_OFFSET 14 +#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) +#define RTIT_CTL_CYC_THRESH_OFFSET 19 +#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) +#define RTIT_CTL_PSB_FREQ_OFFSET 24 +#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) +#define RTIT_CTL_ADDR0_OFFSET 32 +#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET) +#define RTIT_CTL_ADDR1_OFFSET 36 +#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET) +#define RTIT_CTL_ADDR2_OFFSET 40 +#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET) +#define RTIT_CTL_ADDR3_OFFSET 44 +#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET) +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define RTIT_STATUS_FILTEREN BIT(0) +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_BUFFOVF BIT(3) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) +#define RTIT_STATUS_BYTECNT_OFFSET 32 +#define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET) +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +#define MSR_IA32_PASID 0x00000d93 +#define MSR_IA32_PASID_VALID BIT_ULL(31) + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) + +#define MSR_PEBS_FRONTEND 0x000003f7 + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_VR_CURRENT_CONFIG 0x00000601 +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 +#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b + +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D +#define MSR_SECONDARY_TURBO_RATIO_LIMIT 0x00000650 + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Control-flow Enforcement Technology MSRs */ +#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */ +#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */ +#define CET_SHSTK_EN BIT_ULL(0) +#define CET_WRSS_EN BIT_ULL(1) +#define CET_ENDBR_EN BIT_ULL(2) +#define CET_LEG_IW_EN BIT_ULL(3) +#define CET_NO_TRACK_EN BIT_ULL(4) +#define CET_SUPPRESS_DISABLE BIT_ULL(5) +#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9)) +#define CET_SUPPRESS BIT_ULL(10) +#define CET_WAIT_ENDBR BIT_ULL(11) + +#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */ +#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */ +#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */ +#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */ +#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */ + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* Auto-reload via MSR instead of DS area */ +#define MSR_RELOAD_PMC0 0x000014c1 +#define MSR_RELOAD_FIXED_CTR0 0x00001309 + +/* + * AMD64 MSRs. Not complete. See the architecture manual for a more + * complete list. + */ +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD_PERF_CTL 0xc0010062 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD_PPIN_CTL 0xc00102f0 +#define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_1 0xc0011004 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_TW_CFG 0xc0011023 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) +#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9 + +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<> 0) & 0xff) +#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) +#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) +#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) + +#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) +#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) +#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) +#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) + +/* AMD Performance Counter Global Status and Control MSRs */ +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 +#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 + +/* AMD Last Branch Record MSRs */ +#define MSR_AMD64_LBR_SELECT 0xc000010e + +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 + +/* Zen 2 */ +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + +/* Fam 16h MSRs */ +#define MSR_F16H_L2I_PERF_CTL 0xc0010230 +#define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 + +/* Fam 15h MSRs */ +#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a +#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b +#define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + +#define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 +#define MSR_F15H_IC_CFG 0xc0011021 +#define MSR_F15H_EX_CFG 0xc001102c + +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 +#define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_HWCR_SMMLOCK_BIT 0 +#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 + +/* K6 MSRs */ +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_EBC_FREQUENCY_ID 0x0000002c +#define MSR_SMI_COUNT 0x00000034 + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_LC_ENABLED BIT(17) +#define FEAT_CTL_SGX_ENABLED BIT(18) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + +#define MSR_IA32_TSC_ADJUST 0x0000003b +#define MSR_IA32_BNDCFGS 0x00000d90 + +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 +#define MSR_IA32_XSS 0x00000da0 + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +/* Intel SGX Launch Enclave Public Key Hash MSRs */ +#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C +#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D +#define MSR_IA32_SGXLEPUBKEYHASH2 0x0000008E +#define MSR_IA32_SGXLEPUBKEYHASH3 0x0000008F + +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b +#define MSR_IA32_SMBASE 0x0000009e + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 +#define INTEL_PERF_CTL_MASK 0xffff + +/* AMD Branch Sampling configuration */ +#define MSR_AMD_DBG_EXTN_CFG 0xc000010f +#define MSR_AMD_SAMP_BR_FROM 0xc0010300 + +#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6) + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b + +#define THERM_INT_HIGH_ENABLE (1 << 0) +#define THERM_INT_LOW_ENABLE (1 << 1) +#define THERM_INT_PLN_ENABLE (1 << 24) + +#define MSR_IA32_THERM_STATUS 0x0000019c + +#define THERM_STATUS_PROCHOT (1 << 0) +#define THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_THERM2_CTL 0x0000019d + +#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) + +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 + +#define MSR_MISC_FEATURE_CONTROL 0x000001a4 +#define MSR_MISC_PWR_MGMT 0x000001aa + +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define ENERGY_PERF_BIAS_PERFORMANCE 0 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 +#define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 +#define ENERGY_PERF_BIAS_POWERSAVE 15 + +#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 + +#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) +#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) +#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 + +#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) +#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) +#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) +#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25) + +/* Thermal Thresholds Support */ +#define THERM_INT_THRESHOLD0_ENABLE (1 << 15) +#define THERM_SHIFT_THRESHOLD0 8 +#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) +#define THERM_INT_THRESHOLD1_ENABLE (1 << 23) +#define THERM_SHIFT_THRESHOLD1 16 +#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) +#define THERM_STATUS_THRESHOLD0 (1 << 6) +#define THERM_LOG_THRESHOLD0 (1 << 7) +#define THERM_STATUS_THRESHOLD1 (1 << 8) +#define THERM_LOG_THRESHOLD1 (1 << 9) + +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) +#define MSR_IA32_MISC_ENABLE_TCC_BIT 1 +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) +#define MSR_IA32_MISC_ENABLE_EMON_BIT 7 +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) +#define MSR_IA32_MISC_ENABLE_TM1_BIT 3 +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) +#define MSR_IA32_MISC_ENABLE_TM2_BIT 13 +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) + +/* MISC_FEATURES_ENABLES non-architectural features */ +#define MSR_MISC_FEATURES_ENABLES 0x00000140 + +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) +#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 + +#define MSR_IA32_TSC_DEADLINE 0x000006E0 + + +#define MSR_TSX_FORCE_ABORT 0x0000010F + +#define MSR_TFA_RTM_FORCE_ABORT_BIT 0 +#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) +#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1 +#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT) +#define MSR_TFA_SDV_ENABLE_RTM_BIT 2 +#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT) + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +#define MSR_PERF_METRICS 0x00000329 + +/* PERF_GLOBAL_OVF_CTL bits */ +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT) + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 + +/* VMX_BASIC bits and bitmasks */ +#define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE_SHIFT 50 +#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU +#define VMX_BASIC_MEM_TYPE_WB 6LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU + +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_IGNNE 0xc0010115 +#define MSR_VM_HSAVE_PA 0xc0010117 + +/* Hardware Feedback Interface */ +#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 +#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 + +/* x2APIC locked status */ +#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD +#define LEGACY_XAPIC_DISABLED BIT(0) /* + * x2APIC mode is locked and + * disabling x2APIC will cause + * a #GP + */ + +#endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/msr-trace.h b/arch/x86/include/asm/msr-trace.h new file mode 100644 index 000000000..f6adbe968 --- /dev/null +++ b/arch/x86/include/asm/msr-trace.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM msr + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE msr-trace + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/ + +#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MSR_H + +#include + +/* + * Tracing for x86 model specific registers. Directly maps to the + * RDMSR/WRMSR instructions. + */ + +DECLARE_EVENT_CLASS(msr_trace_class, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed), + TP_STRUCT__entry( + __field( unsigned, msr ) + __field( u64, val ) + __field( int, failed ) + ), + TP_fast_assign( + __entry->msr = msr; + __entry->val = val; + __entry->failed = failed; + ), + TP_printk("%x, value %llx%s", + __entry->msr, + __entry->val, + __entry->failed ? " #GP" : "") +); + +DEFINE_EVENT(msr_trace_class, read_msr, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +DEFINE_EVENT(msr_trace_class, write_msr, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +DEFINE_EVENT(msr_trace_class, rdpmc, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +#endif /* _TRACE_MSR_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h new file mode 100644 index 000000000..65ec1965c --- /dev/null +++ b/arch/x86/include/asm/msr.h @@ -0,0 +1,384 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MSR_H +#define _ASM_X86_MSR_H + +#include "msr-index.h" + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +struct msr_info { + u32 msr_no; + struct msr reg; + struct msr *msrs; + int err; +}; + +struct msr_regs_info { + u32 *regs; + int err; +}; + +struct saved_msr { + bool valid; + struct msr_info info; +}; + +struct saved_msrs { + unsigned int num; + struct saved_msr *array; +}; + +/* + * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" + * constraint has different meanings. For i386, "A" means exactly + * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, + * it means rax *or* rdx. + */ +#ifdef CONFIG_X86_64 +/* Using 64-bit values saves one instruction clearing the high half of low */ +#define DECLARE_ARGS(val, low, high) unsigned long low, high +#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) +#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) +#else +#define DECLARE_ARGS(val, low, high) unsigned long long val +#define EAX_EDX_VAL(val, low, high) (val) +#define EAX_EDX_RET(val, low, high) "=A" (val) +#endif + +/* + * Be very careful with includes. This header is prone to include loops. + */ +#include +#include + +#ifdef CONFIG_TRACEPOINTS +DECLARE_TRACEPOINT(read_msr); +DECLARE_TRACEPOINT(write_msr); +DECLARE_TRACEPOINT(rdpmc); +extern void do_trace_write_msr(unsigned int msr, u64 val, int failed); +extern void do_trace_read_msr(unsigned int msr, u64 val, int failed); +extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed); +#else +static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {} +static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} +static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} +#endif + +/* + * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR + * accessors and should not have any tracing or other functionality piggybacking + * on them - those are *purely* for accessing MSRs and nothing more. So don't even + * think of extending them - you will be slapped with a stinking trout or a frozen + * shark will reach you, wherever you are! You've been warned. + */ +static __always_inline unsigned long long __rdmsr(unsigned int msr) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("1: rdmsr\n" + "2:\n" + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR) + : EAX_EDX_RET(val, low, high) : "c" (msr)); + + return EAX_EDX_VAL(val, low, high); +} + +static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) +{ + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) + : : "c" (msr), "a"(low), "d" (high) : "memory"); +} + +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = __rdmsr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + __wrmsr(msr, low, high) + +#define native_wrmsrl(msr, val) \ + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + unsigned long long val; + + val = __rdmsr(msr); + + if (tracepoint_enabled(read_msr)) + do_trace_read_msr(msr, val, 0); + + return val; +} + +static inline unsigned long long native_read_msr_safe(unsigned int msr, + int *err) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("1: rdmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err]) + : [err] "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr)); + if (tracepoint_enabled(read_msr)) + do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); + return EAX_EDX_VAL(val, low, high); +} + +/* Can be uninlined because referenced by paravirt */ +static inline void notrace +native_write_msr(unsigned int msr, u32 low, u32 high) +{ + __wrmsr(msr, low, high); + + if (tracepoint_enabled(write_msr)) + do_trace_write_msr(msr, ((u64)high << 32 | low), 0); +} + +/* Can be uninlined because referenced by paravirt */ +static inline int notrace +native_write_msr_safe(unsigned int msr, u32 low, u32 high) +{ + int err; + + asm volatile("1: wrmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err]) + : [err] "=a" (err) + : "c" (msr), "0" (low), "d" (high) + : "memory"); + if (tracepoint_enabled(write_msr)) + do_trace_write_msr(msr, ((u64)high << 32 | low), err); + return err; +} + +extern int rdmsr_safe_regs(u32 regs[8]); +extern int wrmsr_safe_regs(u32 regs[8]); + +/** + * rdtsc() - returns the current TSC without ordering constraints + * + * rdtsc() returns the result of RDTSC as a 64-bit integer. The + * only ordering constraint it supplies is the ordering implied by + * "asm volatile": it will put the RDTSC in the place you expect. The + * CPU can and will speculatively execute that RDTSC, though, so the + * results can be non-monotonic if compared on different CPUs. + */ +static __always_inline unsigned long long rdtsc(void) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); + + return EAX_EDX_VAL(val, low, high); +} + +/** + * rdtsc_ordered() - read the current TSC in program order + * + * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. + * It is ordered like a load to a global in-memory counter. It should + * be impossible to observe non-monotonic rdtsc_unordered() behavior + * across multiple CPUs as long as the TSC is synced. + */ +static __always_inline unsigned long long rdtsc_ordered(void) +{ + DECLARE_ARGS(val, low, high); + + /* + * The RDTSC instruction is not ordered relative to memory + * access. The Intel SDM and the AMD APM are both vague on this + * point, but empirically an RDTSC instruction can be + * speculatively executed before prior loads. An RDTSC + * immediately after an appropriate barrier appears to be + * ordered as a normal load, that is, it provides the same + * ordering guarantees as reading from a global memory location + * that some other imaginary CPU is updating continuously with a + * time stamp. + * + * Thus, use the preferred barrier on the respective CPU, aiming for + * RDTSCP as the default. + */ + asm volatile(ALTERNATIVE_2("rdtsc", + "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, + "rdtscp", X86_FEATURE_RDTSCP) + : EAX_EDX_RET(val, low, high) + /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ + :: "ecx"); + + return EAX_EDX_VAL(val, low, high); +} + +static inline unsigned long long native_read_pmc(int counter) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); + if (tracepoint_enabled(rdpmc)) + do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); + return EAX_EDX_VAL(val, low, high); +} + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#include +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr, low, high) \ +do { \ + u64 __val = native_read_msr((msr)); \ + (void)((low) = (u32)__val); \ + (void)((high) = (u32)(__val >> 32)); \ +} while (0) + +static inline void wrmsr(unsigned int msr, u32 low, u32 high) +{ + native_write_msr(msr, low, high); +} + +#define rdmsrl(msr, val) \ + ((val) = native_read_msr((msr))) + +static inline void wrmsrl(unsigned int msr, u64 val) +{ + native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); +} + +/* wrmsr with exception handling */ +static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high) +{ + return native_write_msr_safe(msr, low, high); +} + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr, low, high) \ +({ \ + int __err; \ + u64 __val = native_read_msr_safe((msr), &__err); \ + (*low) = (u32)__val; \ + (*high) = (u32)(__val >> 32); \ + __err; \ +}) + +static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p) +{ + int err; + + *p = native_read_msr_safe(msr, &err); + return err; +} + +#define rdpmc(counter, low, high) \ +do { \ + u64 _l = native_read_pmc((counter)); \ + (low) = (u32)_l; \ + (high) = (u32)(_l >> 32); \ +} while (0) + +#define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) + +#endif /* !CONFIG_PARAVIRT_XXL */ + +/* + * 64-bit version of wrmsr_safe(): + */ +static inline int wrmsrl_safe(u32 msr, u64 val) +{ + return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); +} + +struct msr *msrs_alloc(void); +void msrs_free(struct msr *msrs); +int msr_set_bit(u32 msr, u8 bit); +int msr_clear_bit(u32 msr, u8 bit); + +#ifdef CONFIG_SMP +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); +int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); +int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q); +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); +#else /* CONFIG_SMP */ +static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + rdmsr(msr_no, *l, *h); + return 0; +} +static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + wrmsr(msr_no, l, h); + return 0; +} +static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +{ + rdmsrl(msr_no, *q); + return 0; +} +static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +{ + wrmsrl(msr_no, q); + return 0; +} +static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, + struct msr *msrs) +{ + rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); +} +static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, + struct msr *msrs) +{ + wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); +} +static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, + u32 *l, u32 *h) +{ + return rdmsr_safe(msr_no, l, h); +} +static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + return wrmsr_safe(msr_no, l, h); +} +static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +{ + return rdmsrl_safe(msr_no, q); +} +static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +{ + return wrmsrl_safe(msr_no, q); +} +static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) +{ + return rdmsr_safe_regs(regs); +} +static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) +{ + return wrmsr_safe_regs(regs); +} +#endif /* CONFIG_SMP */ +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_MSR_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h new file mode 100644 index 000000000..76d726074 --- /dev/null +++ b/arch/x86/include/asm/mtrr.h @@ -0,0 +1,127 @@ +/* Generic MTRR (Memory Type Range Register) ioctls. + + Copyright (C) 1997-1999 Richard Gooch + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ +#ifndef _ASM_X86_MTRR_H +#define _ASM_X86_MTRR_H + +#include + +void mtrr_bp_init(void); + +/* + * The following functions are for use by other drivers that cannot use + * arch_phys_wc_add and arch_phys_wc_del. + */ +# ifdef CONFIG_MTRR +extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); +extern void mtrr_save_fixed_ranges(void *); +extern void mtrr_save_state(void); +extern int mtrr_add(unsigned long base, unsigned long size, + unsigned int type, bool increment); +extern int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, bool increment); +extern int mtrr_del(int reg, unsigned long base, unsigned long size); +extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); +extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); +extern void mtrr_ap_init(void); +extern void set_mtrr_aps_delayed_init(void); +extern void mtrr_aps_init(void); +extern void mtrr_bp_restore(void); +extern int mtrr_trim_uncached_memory(unsigned long end_pfn); +extern int amd_special_default_mtrr(void); +# else +static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) +{ + /* + * Return no-MTRRs: + */ + return MTRR_TYPE_INVALID; +} +#define mtrr_save_fixed_ranges(arg) do {} while (0) +#define mtrr_save_state() do {} while (0) +static inline int mtrr_add(unsigned long base, unsigned long size, + unsigned int type, bool increment) +{ + return -ENODEV; +} +static inline int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, bool increment) +{ + return -ENODEV; +} +static inline int mtrr_del(int reg, unsigned long base, unsigned long size) +{ + return -ENODEV; +} +static inline int mtrr_del_page(int reg, unsigned long base, unsigned long size) +{ + return -ENODEV; +} +static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + return 0; +} +static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) +{ +} +#define mtrr_ap_init() do {} while (0) +#define set_mtrr_aps_delayed_init() do {} while (0) +#define mtrr_aps_init() do {} while (0) +#define mtrr_bp_restore() do {} while (0) +# endif + +#ifdef CONFIG_COMPAT +#include + +struct mtrr_sentry32 { + compat_ulong_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ +}; + +struct mtrr_gentry32 { + compat_ulong_t regnum; /* Register number */ + compat_uint_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ +}; + +#define MTRR_IOCTL_BASE 'M' + +#define MTRRIOC32_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry32) +#define MTRRIOC32_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry32) +#define MTRRIOC32_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry32) +#define MTRRIOC32_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry32) +#define MTRRIOC32_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry32) +#define MTRRIOC32_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry32) +#define MTRRIOC32_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry32) +#define MTRRIOC32_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry32) +#define MTRRIOC32_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry32) +#define MTRRIOC32_KILL_PAGE_ENTRY \ + _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) +#endif /* CONFIG_COMPAT */ + +/* Bit fields for enabled in struct mtrr_state_type */ +#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 +#define MTRR_STATE_MTRR_ENABLED 0x02 + +#endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h new file mode 100644 index 000000000..3a8fdf881 --- /dev/null +++ b/arch/x86/include/asm/mwait.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MWAIT_H +#define _ASM_X86_MWAIT_H + +#include +#include + +#include +#include + +#define MWAIT_SUBSTATE_MASK 0xf +#define MWAIT_CSTATE_MASK 0xf +#define MWAIT_SUBSTATE_SIZE 4 +#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) +#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) +#define MWAIT_C1_SUBSTATE_MASK 0xf0 + +#define CPUID_MWAIT_LEAF 5 +#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 +#define CPUID5_ECX_INTERRUPT_BREAK 0x2 + +#define MWAIT_ECX_INTERRUPT_BREAK 0x1 +#define MWAITX_ECX_TIMER_ENABLE BIT(1) +#define MWAITX_MAX_WAIT_CYCLES UINT_MAX +#define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 + +static inline void __monitor(const void *eax, unsigned long ecx, + unsigned long edx) +{ + /* "monitor %eax, %ecx, %edx;" */ + asm volatile(".byte 0x0f, 0x01, 0xc8;" + :: "a" (eax), "c" (ecx), "d"(edx)); +} + +static inline void __monitorx(const void *eax, unsigned long ecx, + unsigned long edx) +{ + /* "monitorx %eax, %ecx, %edx;" */ + asm volatile(".byte 0x0f, 0x01, 0xfa;" + :: "a" (eax), "c" (ecx), "d"(edx)); +} + +static inline void __mwait(unsigned long eax, unsigned long ecx) +{ + mds_idle_clear_cpu_buffers(); + + /* "mwait %eax, %ecx;" */ + asm volatile(".byte 0x0f, 0x01, 0xc9;" + :: "a" (eax), "c" (ecx)); +} + +/* + * MWAITX allows for a timer expiration to get the core out a wait state in + * addition to the default MWAIT exit condition of a store appearing at a + * monitored virtual address. + * + * Registers: + * + * MWAITX ECX[1]: enable timer if set + * MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0 + * frequency is the same as the TSC frequency. + * + * Below is a comparison between MWAIT and MWAITX on AMD processors: + * + * MWAIT MWAITX + * opcode 0f 01 c9 | 0f 01 fb + * ECX[0] value of RFLAGS.IF seen by instruction + * ECX[1] unused/#GP if set | enable timer if set + * ECX[31:2] unused/#GP if set + * EAX unused (reserve for hint) + * EBX[31:0] unused | max wait time (P0 clocks) + * + * MONITOR MONITORX + * opcode 0f 01 c8 | 0f 01 fa + * EAX (logical) address to monitor + * ECX #GP if not zero + */ +static inline void __mwaitx(unsigned long eax, unsigned long ebx, + unsigned long ecx) +{ + /* No MDS buffer clear as this is AMD/HYGON only */ + + /* "mwaitx %eax, %ebx, %ecx;" */ + asm volatile(".byte 0x0f, 0x01, 0xfb;" + :: "a" (eax), "b" (ebx), "c" (ecx)); +} + +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ + asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" + :: "a" (eax), "c" (ecx)); +} + +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +{ + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { + if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { + mb(); + clflush((void *)¤t_thread_info()->flags); + mb(); + } + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + if (!need_resched()) + __mwait(eax, ecx); + } + current_clr_polling(); +} + +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx, %edx, %eax;" */ + #ifdef CONFIG_AS_TPAUSE + asm volatile("tpause %%ecx\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #else + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #endif +} + +#endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h new file mode 100644 index 000000000..5c5f1e56c --- /dev/null +++ b/arch/x86/include/asm/nmi.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NMI_H +#define _ASM_X86_NMI_H + +#include +#include +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC + +extern int reserve_perfctr_nmi(unsigned int); +extern void release_perfctr_nmi(unsigned int); +extern int reserve_evntsel_nmi(unsigned int); +extern void release_evntsel_nmi(unsigned int); + +struct ctl_table; +extern int proc_nmi_enabled(struct ctl_table *, int , + void __user *, size_t *, loff_t *); +extern int unknown_nmi_panic; + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#define NMI_FLAG_FIRST 1 + +enum { + NMI_LOCAL=0, + NMI_UNKNOWN, + NMI_SERR, + NMI_IO_CHECK, + NMI_MAX +}; + +#define NMI_DONE 0 +#define NMI_HANDLED 1 + +typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); + +struct nmiaction { + struct list_head list; + nmi_handler_t handler; + u64 max_duration; + unsigned long flags; + const char *name; +}; + +#define register_nmi_handler(t, fn, fg, n, init...) \ +({ \ + static struct nmiaction init fn##_na = { \ + .list = LIST_HEAD_INIT(fn##_na.list), \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + +int __register_nmi_handler(unsigned int, struct nmiaction *); + +void unregister_nmi_handler(unsigned int, const char *); + +void stop_nmi(void); +void restart_nmi(void); +void local_touch_nmi(void); + +#endif /* _ASM_X86_NMI_H */ diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h new file mode 100644 index 000000000..c5573eaa5 --- /dev/null +++ b/arch/x86/include/asm/nops.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NOPS_H +#define _ASM_X86_NOPS_H + +#include + +/* + * Define nops for use with alternative() and for tracing. + */ + +#ifndef CONFIG_64BIT + +/* + * Generic 32bit nops from GAS: + * + * 1: nop + * 2: movl %esi,%esi + * 3: leal 0x0(%esi),%esi + * 4: leal 0x0(%esi,%eiz,1),%esi + * 5: leal %ds:0x0(%esi,%eiz,1),%esi + * 6: leal 0x0(%esi),%esi + * 7: leal 0x0(%esi,%eiz,1),%esi + * 8: leal %ds:0x0(%esi,%eiz,1),%esi + * + * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2 + * nop instructions. + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x89,0xf6 +#define BYTES_NOP3 0x8d,0x76,0x00 +#define BYTES_NOP4 0x8d,0x74,0x26,0x00 +#define BYTES_NOP5 0x3e,BYTES_NOP4 +#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x3e,BYTES_NOP7 + +#else + +/* + * Generic 64bit nops from GAS: + * + * 1: nop + * 2: osp nop + * 3: nopl (%eax) + * 4: nopl 0x00(%eax) + * 5: nopl 0x00(%eax,%eax,1) + * 6: osp nopl 0x00(%eax,%eax,1) + * 7: nopl 0x00000000(%eax) + * 8: nopl 0x00000000(%eax,%eax,1) + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x66,BYTES_NOP1 +#define BYTES_NOP3 0x0f,0x1f,0x00 +#define BYTES_NOP4 0x0f,0x1f,0x40,0x00 +#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00 +#define BYTES_NOP6 0x66,BYTES_NOP5 +#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 + +#endif /* CONFIG_64BIT */ + +#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1) +#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2) +#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3) +#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4) +#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5) +#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6) +#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) +#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) + +#define ASM_NOP_MAX 8 + +#ifndef __ASSEMBLY__ +extern const unsigned char * const x86_nops[]; +#endif + +#endif /* _ASM_X86_NOPS_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 000000000..2f123d4fb --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,425 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ +#define _ASM_X86_NOSPEC_BRANCH_H_ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define RETPOLINE_THUNK_SIZE 32 + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +/* + * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. + */ +#define __FILL_RETURN_SLOT \ + ANNOTATE_INTRA_FUNCTION_CALL; \ + call 772f; \ + int3; \ +772: + +/* + * Stuff the entire RSB. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#ifdef CONFIG_X86_64 +#define __FILL_RETURN_BUFFER(reg, nr) \ + mov $(nr/2), reg; \ +771: \ + __FILL_RETURN_SLOT \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ + dec reg; \ + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; +#else +/* + * i386 doesn't unconditionally have LFENCE, as such it can't + * do a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr) \ + .rept nr; \ + __FILL_RETURN_SLOT; \ + .endr; \ + add $(BITS_PER_LONG/8) * nr, %_ASM_SP; +#endif + +/* + * Stuff a single RSB slot. + * + * To mitigate Post-Barrier RSB speculation, one CALL instruction must be + * forced to retire before letting a RET instruction execute. + * + * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed + * before this point. + */ +#define __FILL_ONE_RETURN \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8), %_ASM_SP; \ + lfence; + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +.macro ANNOTATE_RETPOLINE_SAFE + .Lannotate_\@: + .pushsection .discard.retpoline_safe + _ASM_PTR .Lannotate_\@ + .popsection +.endm + +/* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#if (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + +/* + * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call + * to the retpoline thunk with a CS prefix when the register requires + * a RAX prefix byte to encode. Also see apply_retpolines(). + */ +.macro __CS_PREFIX reg:req + .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 + .ifc \reg,\rs + .byte 0x2e + .endif + .endr +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + __CS_PREFIX \reg + jmp __x86_indirect_thunk_\reg +#else + jmp *%\reg + int3 +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + __CS_PREFIX \reg + call __x86_indirect_thunk_\reg +#else + call *%\reg +#endif +.endm + + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ + __stringify(__FILL_ONE_RETURN), \ftr2 + +.Lskip_rsb_\@: +.endm + +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_UNTRAIN_RET "call entry_untrain_ret" +#else +#define CALL_UNTRAIN_RET "" +#endif + +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CPU_SRSO) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_RETPOLINE_SAFE \ + "999:\n\t" \ + ".pushsection .discard.retpoline_safe\n\t" \ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + +typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + +#ifdef CONFIG_RETHUNK +extern void __x86_return_thunk(void); +#else +static inline void __x86_return_thunk(void) {} +#endif + +extern void retbleed_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + +extern void retbleed_untrain_ret(void); +extern void srso_untrain_ret(void); +extern void srso_alias_untrain_ret(void); + +extern void entry_untrain_ret(void); +extern void entry_ibpb(void); + +#ifdef CONFIG_RETPOLINE + +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; +#include +#undef GEN + +#ifdef CONFIG_X86_64 + +/* + * Inline asm uses the %V modifier which is only in newer GCC + * which is ensured when CONFIG_RETPOLINE is defined. + */ +# define CALL_NOSPEC \ + ALTERNATIVE_2( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_LFENCE) + +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#else /* CONFIG_X86_32 */ +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC \ + ALTERNATIVE_2( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " lfence;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: lea 4(%%esp), %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_LFENCE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif +#else /* No retpoline for C / inline asm */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, +}; + +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_STRICT_PREFERRED, + SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, +}; + +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, +}; + +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" ((u32)val), + "d" ((u32)(val >> 32)), + [feature] "i" (feature) + : "memory"); +} + +extern u64 x86_pred_cmd; + +static inline void indirect_branch_prediction_barrier(void) +{ + alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); +} + +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void update_spec_ctrl_cond(u64 val); +extern u64 spec_ctrl_current(void); + +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) + */ +#define firmware_restrict_branch_speculation_start() \ +do { \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ + X86_FEATURE_USE_IBPB_FW); \ +} while (0) + +#define firmware_restrict_branch_speculation_end() \ +do { \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ +} while (0) + +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); + +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + +#include + +/** + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static __always_inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static __always_inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h new file mode 100644 index 000000000..ef2844d69 --- /dev/null +++ b/arch/x86/include/asm/numa.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NUMA_H +#define _ASM_X86_NUMA_H + +#include +#include + +#include +#include + +#ifdef CONFIG_NUMA + +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) + +extern int numa_off; + +/* + * __apicid_to_node[] stores the raw mapping between physical apicid and + * node and is used to initialize cpu_to_node mapping. + * + * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus + * should be accessed by the accessors - set_apicid_to_node() and + * numa_cpu_node(). + */ +extern s16 __apicid_to_node[MAX_LOCAL_APIC]; +extern nodemask_t numa_nodes_parsed __initdata; + +extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); +extern void __init numa_set_distance(int from, int to, int distance); + +static inline void set_apicid_to_node(int apicid, s16 node) +{ + __apicid_to_node[apicid] = node; +} + +extern int numa_cpu_node(int cpu); + +#else /* CONFIG_NUMA */ +static inline void set_apicid_to_node(int apicid, s16 node) +{ +} + +static inline int numa_cpu_node(int cpu) +{ + return NUMA_NO_NODE; +} +#endif /* CONFIG_NUMA */ + +#ifdef CONFIG_X86_32 +# include +#endif + +#ifdef CONFIG_NUMA +extern void numa_set_node(int cpu, int node); +extern void numa_clear_node(int cpu); +extern void __init init_cpu_to_node(void); +extern void numa_add_cpu(int cpu); +extern void numa_remove_cpu(int cpu); +extern void init_gi_nodes(void); +#else /* CONFIG_NUMA */ +static inline void numa_set_node(int cpu, int node) { } +static inline void numa_clear_node(int cpu) { } +static inline void init_cpu_to_node(void) { } +static inline void numa_add_cpu(int cpu) { } +static inline void numa_remove_cpu(int cpu) { } +static inline void init_gi_nodes(void) { } +#endif /* CONFIG_NUMA */ + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +void debug_cpumask_set_cpu(int cpu, int node, bool enable); +#endif + +#ifdef CONFIG_NUMA_EMU +#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) +#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) +int numa_emu_cmdline(char *str); +#else /* CONFIG_NUMA_EMU */ +static inline int numa_emu_cmdline(char *str) +{ + return -EINVAL; +} +#endif /* CONFIG_NUMA_EMU */ + +#endif /* _ASM_X86_NUMA_H */ diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h new file mode 100644 index 000000000..9c8e9e85b --- /dev/null +++ b/arch/x86/include/asm/numa_32.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NUMA_32_H +#define _ASM_X86_NUMA_32_H + +#ifdef CONFIG_HIGHMEM +extern void set_highmem_pages_init(void); +#else +static inline void set_highmem_pages_init(void) +{ +} +#endif + +#endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h new file mode 100644 index 000000000..c64373a2d --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip.h @@ -0,0 +1,20 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific header file + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + */ + +#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H +#define _ASM_X86_NUMACHIP_NUMACHIP_H + +extern u8 numachip_system; +extern int __init pci_numachip_init(void); + +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h new file mode 100644 index 000000000..29719eecd --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip_csr.h @@ -0,0 +1,98 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-Specific Header file + * + * Copyright (C) 2011 Numascale AS. All rights reserved. + * + * Send feedback to + * + */ + +#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H +#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H + +#include +#include + +#define CSR_NODE_SHIFT 16 +#define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT) +#define CSR_NODE_MASK 0x0fff /* 4K nodes */ + +/* 32K CSR space, b15 indicates geo/non-geo */ +#define CSR_OFFSET_MASK 0x7fffUL +#define CSR_G0_NODE_IDS (0x008 + (0 << 12)) +#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12)) + +/* + * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however + * when using the direct mapping on x86_64, both start and size needs to be + * aligned with PMD_SIZE which is 2M + */ +#define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL +#define NUMACHIP_LCSR_LIM 0x3fffffffffffULL +#define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1) +#define NUMACHIP_LAPIC_BITS 8 + +static inline void *lcsr_address(unsigned long offset) +{ + return __va(NUMACHIP_LCSR_BASE | (1UL << 15) | + CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK)); +} + +static inline unsigned int read_lcsr(unsigned long offset) +{ + return swab32(readl(lcsr_address(offset))); +} + +static inline void write_lcsr(unsigned long offset, unsigned int val) +{ + writel(swab32(val), lcsr_address(offset)); +} + +/* + * On NumaChip2, local CSR space is 16MB and starts at fixed offset below 4G + */ + +#define NUMACHIP2_LCSR_BASE 0xf0000000UL +#define NUMACHIP2_LCSR_SIZE 0x1000000UL +#define NUMACHIP2_APIC_ICR 0x100000 +#define NUMACHIP2_TIMER_DEADLINE 0x200000 +#define NUMACHIP2_TIMER_INT 0x200008 +#define NUMACHIP2_TIMER_NOW 0x200018 +#define NUMACHIP2_TIMER_RESET 0x200020 + +static inline void __iomem *numachip2_lcsr_address(unsigned long offset) +{ + return (void __iomem *)__va(NUMACHIP2_LCSR_BASE | + (offset & (NUMACHIP2_LCSR_SIZE - 1))); +} + +static inline u32 numachip2_read32_lcsr(unsigned long offset) +{ + return readl(numachip2_lcsr_address(offset)); +} + +static inline u64 numachip2_read64_lcsr(unsigned long offset) +{ + return readq(numachip2_lcsr_address(offset)); +} + +static inline void numachip2_write32_lcsr(unsigned long offset, u32 val) +{ + writel(val, numachip2_lcsr_address(offset)); +} + +static inline void numachip2_write64_lcsr(unsigned long offset, u64 val) +{ + writeq(val, numachip2_lcsr_address(offset)); +} + +static inline unsigned int numachip2_timer(void) +{ + return (smp_processor_id() % 48) << 6; +} + +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */ diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h new file mode 100644 index 000000000..6fe76282a --- /dev/null +++ b/arch/x86/include/asm/olpc.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* OLPC machine specific definitions */ + +#ifndef _ASM_X86_OLPC_H +#define _ASM_X86_OLPC_H + +#include + +struct olpc_platform_t { + int flags; + uint32_t boardrev; +}; + +#define OLPC_F_PRESENT 0x01 +#define OLPC_F_DCON 0x02 + +#ifdef CONFIG_OLPC + +extern struct olpc_platform_t olpc_platform_info; + +/* + * OLPC board IDs contain the major build number within the mask 0x0ff0, + * and the minor build number within 0x000f. Pre-builds have a minor + * number less than 8, and normal builds start at 8. For example, 0x0B10 + * is a PreB1, and 0x0C18 is a C1. + */ + +static inline uint32_t olpc_board(uint8_t id) +{ + return (id << 4) | 0x8; +} + +static inline uint32_t olpc_board_pre(uint8_t id) +{ + return id << 4; +} + +static inline int machine_is_olpc(void) +{ + return (olpc_platform_info.flags & OLPC_F_PRESENT) ? 1 : 0; +} + +/* + * The DCON is OLPC's Display Controller. It has a number of unique + * features that we might want to take advantage of.. + */ +static inline int olpc_has_dcon(void) +{ + return (olpc_platform_info.flags & OLPC_F_DCON) ? 1 : 0; +} + +/* + * The "Mass Production" version of OLPC's XO is identified as being model + * C2. During the prototype phase, the following models (in chronological + * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models + * were based on Geode GX CPUs, and models after that were based upon + * Geode LX CPUs. There were also some hand-assembled models floating + * around, referred to as PreB1, PreB2, etc. + */ +static inline int olpc_board_at_least(uint32_t rev) +{ + return olpc_platform_info.boardrev >= rev; +} + +#else + +static inline int machine_is_olpc(void) +{ + return 0; +} + +static inline int olpc_has_dcon(void) +{ + return 0; +} + +#endif + +#ifdef CONFIG_OLPC_XO1_PM +extern void do_olpc_suspend_lowlevel(void); +extern void olpc_xo1_pm_wakeup_set(u16 value); +extern void olpc_xo1_pm_wakeup_clear(u16 value); +#endif + +extern int pci_olpc_init(void); + +/* GPIO assignments */ + +#define OLPC_GPIO_MIC_AC 1 +#define OLPC_GPIO_DCON_STAT0 5 +#define OLPC_GPIO_DCON_STAT1 6 +#define OLPC_GPIO_DCON_IRQ 7 +#define OLPC_GPIO_THRM_ALRM geode_gpio(10) +#define OLPC_GPIO_DCON_LOAD 11 +#define OLPC_GPIO_DCON_BLANK 12 +#define OLPC_GPIO_SMB_CLK 14 +#define OLPC_GPIO_SMB_DATA 15 +#define OLPC_GPIO_WORKAUX geode_gpio(24) +#define OLPC_GPIO_LID 26 +#define OLPC_GPIO_ECSCI 27 + +#endif /* _ASM_X86_OLPC_H */ diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h new file mode 100644 index 000000000..8c2a1daf7 --- /dev/null +++ b/arch/x86/include/asm/olpc_ofw.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_OLPC_OFW_H +#define _ASM_X86_OLPC_OFW_H + +/* index into the page table containing the entry OFW occupies */ +#define OLPC_OFW_PDE_NR 1022 + +#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */ + +#ifdef CONFIG_OLPC + +extern bool olpc_ofw_is_installed(void); + +/* run an OFW command by calling into the firmware */ +#define olpc_ofw(name, args, res) \ + __olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) + +extern int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, + void **res); + +/* determine whether OFW is available and lives in the proper memory */ +extern void olpc_ofw_detect(void); + +/* install OFW's pde permanently into the kernel's pgtable */ +extern void setup_olpc_ofw_pgd(void); + +/* check if OFW was detected during boot */ +extern bool olpc_ofw_present(void); + +extern void olpc_dt_build_devicetree(void); + +#else /* !CONFIG_OLPC */ +static inline void olpc_ofw_detect(void) { } +static inline void setup_olpc_ofw_pgd(void) { } +static inline void olpc_dt_build_devicetree(void) { } +#endif /* !CONFIG_OLPC */ + +#endif /* _ASM_X86_OLPC_OFW_H */ diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h new file mode 100644 index 000000000..241631282 --- /dev/null +++ b/arch/x86/include/asm/orc_lookup.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + */ +#ifndef _ORC_LOOKUP_H +#define _ORC_LOOKUP_H + +/* + * This is a lookup table for speeding up access to the .orc_unwind table. + * Given an input address offset, the corresponding lookup table entry + * specifies a subset of the .orc_unwind table to search. + * + * Each block represents the end of the previous range and the start of the + * next range. An extra block is added to give the last range an end. + * + * The block size should be a power of 2 to avoid a costly 'div' instruction. + * + * A block size of 256 was chosen because it roughly doubles unwinder + * performance while only adding ~5% to the ORC data footprint. + */ +#define LOOKUP_BLOCK_ORDER 8 +#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER) + +#ifndef LINKER_SCRIPT + +extern unsigned int orc_lookup[]; +extern unsigned int orc_lookup_end[]; + +#define LOOKUP_START_IP (unsigned long)_stext +#define LOOKUP_STOP_IP (unsigned long)_etext + +#endif /* LINKER_SCRIPT */ + +#endif /* _ORC_LOOKUP_H */ diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h new file mode 100644 index 000000000..5a2baf28a --- /dev/null +++ b/arch/x86/include/asm/orc_types.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + */ + +#ifndef _ORC_TYPES_H +#define _ORC_TYPES_H + +#include +#include + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and BP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is + * usually based on. + * + * The rest of the base registers are needed for special cases like entry code + * and GCC realigned stacks. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_DX 2 +#define ORC_REG_DI 3 +#define ORC_REG_BP 4 +#define ORC_REG_SP 5 +#define ORC_REG_R10 6 +#define ORC_REG_R13 7 +#define ORC_REG_BP_INDIRECT 8 +#define ORC_REG_SP_INDIRECT 9 +#define ORC_REG_MAX 15 + +#ifndef __ASSEMBLY__ +#include + +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and BP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + s16 sp_offset; + s16 bp_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) + unsigned sp_reg:4; + unsigned bp_reg:4; + unsigned type:2; + unsigned end:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned bp_reg:4; + unsigned sp_reg:4; + unsigned unused:5; + unsigned end:1; + unsigned type:2; +#endif +} __packed; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h new file mode 100644 index 000000000..9cc82f305 --- /dev/null +++ b/arch/x86/include/asm/page.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PAGE_H +#define _ASM_X86_PAGE_H + +#include + +#ifdef __KERNEL__ + +#include + +#ifdef CONFIG_X86_64 +#include +#else +#include +#endif /* CONFIG_X86_64 */ + +#ifndef __ASSEMBLY__ + +struct page; + +#include +extern struct range pfn_mapped[]; +extern int nr_pfn_mapped; + +static inline void clear_user_page(void *page, unsigned long vaddr, + struct page *pg) +{ + clear_page(page); +} + +static inline void copy_user_page(void *to, void *from, unsigned long vaddr, + struct page *topage) +{ + copy_page(to, from); +} + +#define alloc_zeroed_user_highpage_movable(vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE + +#ifndef __pa +#define __pa(x) __phys_addr((unsigned long)(x)) +#endif + +#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) +/* __pa_symbol should be used for C visible symbols. + This seems to be the official gcc blessed way to do such arithmetic. */ +/* + * We need __phys_reloc_hide() here because gcc may assume that there is no + * overflow during __pa() calculation and can optimize it unexpectedly. + * Newer versions of gcc provide -fno-strict-overflow switch to handle this + * case properly. Once all supported versions of gcc understand it, we can + * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) + */ +#define __pa_symbol(x) \ + __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) + +#ifndef __va +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#endif + +#define __boot_va(x) __va(x) +#define __boot_pa(x) __pa(x) + +/* + * virt_to_page(kaddr) returns a valid pointer if and only if + * virt_addr_valid(kaddr) returns true. + */ +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +extern bool __virt_addr_valid(unsigned long kaddr); +#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr)) + +static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return __canonical_address(vaddr, vaddr_bits) == vaddr; +} + +#endif /* __ASSEMBLY__ */ + +#include +#include + +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_PAGE_H */ diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h new file mode 100644 index 000000000..df42f8aa9 --- /dev/null +++ b/arch/x86/include/asm/page_32.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PAGE_32_H +#define _ASM_X86_PAGE_32_H + +#include + +#ifndef __ASSEMBLY__ + +#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) +#ifdef CONFIG_DEBUG_VIRTUAL +extern unsigned long __phys_addr(unsigned long); +#else +#define __phys_addr(x) __phys_addr_nodebug(x) +#endif +#define __phys_addr_symbol(x) __phys_addr(x) +#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#endif /* CONFIG_FLATMEM */ + +#include + +static inline void clear_page(void *page) +{ + memset(page, 0, PAGE_SIZE); +} + +static inline void copy_page(void *to, void *from) +{ + memcpy(to, from, PAGE_SIZE); +} +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h new file mode 100644 index 000000000..faf9cc1c1 --- /dev/null +++ b/arch/x86/include/asm/page_32_types.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PAGE_32_DEFS_H +#define _ASM_X86_PAGE_32_DEFS_H + +#include + +/* + * This handles the memory map. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ +#define __PAGE_OFFSET_BASE _AC(CONFIG_PAGE_OFFSET, UL) +#define __PAGE_OFFSET __PAGE_OFFSET_BASE + +#define __START_KERNEL_map __PAGE_OFFSET + +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) + +#define IRQ_STACK_SIZE THREAD_SIZE + +#define N_EXCEPTION_STACKS 1 + +#ifdef CONFIG_X86_PAE +/* + * This is beyond the 44 bit limit imposed by the 32bit long pfns, + * but we need the full mask to make sure inverted PROT_NONE + * entries have all the host bits set in a guest. + * The real limit is still 44 bits. + */ +#define __PHYSICAL_MASK_SHIFT 52 +#define __VIRTUAL_MASK_SHIFT 32 + +#else /* !CONFIG_X86_PAE */ +#define __PHYSICAL_MASK_SHIFT 32 +#define __VIRTUAL_MASK_SHIFT 32 +#endif /* CONFIG_X86_PAE */ + +/* + * User space process size: 3GB (default). + */ +#define IA32_PAGE_OFFSET __PAGE_OFFSET +#define TASK_SIZE __PAGE_OFFSET +#define TASK_SIZE_LOW TASK_SIZE +#define TASK_SIZE_MAX TASK_SIZE +#define DEFAULT_MAP_WINDOW TASK_SIZE +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP + +/* + * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual + * address for the kernel image, rather than the limit on the size itself. On + * 32-bit, this is not a strict limit, but this value is used to limit the + * link-time virtual address range of the kernel, and by KASLR to limit the + * randomized address from which the kernel is executed. A relocatable kernel + * can be loaded somewhat higher than KERNEL_IMAGE_SIZE as long as enough space + * remains for the vmalloc area. + */ +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) + +#ifndef __ASSEMBLY__ + +/* + * This much address space is reserved for vmalloc() and iomap() + * as well as fixmap mappings. + */ +extern unsigned int __VMALLOC_RESERVE; +extern int sysctl_legacy_va_layout; + +extern void find_low_pfn_range(void); +extern void setup_bootmem_allocator(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PAGE_32_DEFS_H */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h new file mode 100644 index 000000000..198e03e59 --- /dev/null +++ b/arch/x86/include/asm/page_64.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PAGE_64_H +#define _ASM_X86_PAGE_64_H + +#include + +#ifndef __ASSEMBLY__ +#include +#include + +#include + +/* duplicated to the one in bootmem.h */ +extern unsigned long max_pfn; +extern unsigned long phys_base; + +extern unsigned long page_offset_base; +extern unsigned long vmalloc_base; +extern unsigned long vmemmap_base; + +static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); + + return x; +} + +#ifdef CONFIG_DEBUG_VIRTUAL +extern unsigned long __phys_addr(unsigned long); +extern unsigned long __phys_addr_symbol(unsigned long); +#else +#define __phys_addr(x) __phys_addr_nodebug(x) +#define __phys_addr_symbol(x) \ + ((unsigned long)(x) - __START_KERNEL_map + phys_base) +#endif + +#define __phys_reloc_hide(x) (x) + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_pfn) +#endif + +void clear_page_orig(void *page); +void clear_page_rep(void *page); +void clear_page_erms(void *page); + +static inline void clear_page(void *page) +{ + /* + * Clean up KMSAN metadata for the page being cleared. The assembly call + * below clobbers @page, so we perform unpoisoning before it. + */ + kmsan_unpoison_memory(page, PAGE_SIZE); + alternative_call_2(clear_page_orig, + clear_page_rep, X86_FEATURE_REP_GOOD, + clear_page_erms, X86_FEATURE_ERMS, + "=D" (page), + "0" (page) + : "cc", "memory", "rax", "rcx"); +} + +void copy_page(void *to, void *from); + +#ifdef CONFIG_X86_5LEVEL +/* + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] + */ +static __always_inline unsigned long task_size_max(void) +{ + unsigned long ret; + + alternative_io("movq %[small],%0","movq %[large],%0", + X86_FEATURE_LA57, + "=r" (ret), + [small] "i" ((1ul << 47)-PAGE_SIZE), + [large] "i" ((1ul << 56)-PAGE_SIZE)); + + return ret; +} +#endif /* CONFIG_X86_5LEVEL */ + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_X86_VSYSCALL_EMULATION +# define __HAVE_ARCH_GATE_AREA 1 +#endif + +#endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h new file mode 100644 index 000000000..e9e2c3ba5 --- /dev/null +++ b/arch/x86/include/asm/page_64_types.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PAGE_64_DEFS_H +#define _ASM_X86_PAGE_64_DEFS_H + +#ifndef __ASSEMBLY__ +#include +#endif + +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) + +#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER) +#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) + +#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) + +/* + * The index for the tss.ist[] array. The hardware limit is 7 entries. + */ +#define IST_INDEX_DF 0 +#define IST_INDEX_NMI 1 +#define IST_INDEX_DB 2 +#define IST_INDEX_MCE 3 +#define IST_INDEX_VC 4 + +/* + * Set __PAGE_OFFSET to the most negative possible address + + * PGDIR_SIZE*17 (pgd slot 273). + * + * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for + * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary, + * but it's what Xen requires. + */ +#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL) + +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT +#define __PAGE_OFFSET page_offset_base +#else +#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ + +#define __START_KERNEL_map _AC(0xffffffff80000000, UL) + +/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */ + +#define __PHYSICAL_MASK_SHIFT 52 + +#ifdef CONFIG_X86_5LEVEL +#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47) +/* See task_size_max() in */ +#else +#define __VIRTUAL_MASK_SHIFT 47 +#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) +#endif + +#define TASK_SIZE_MAX task_size_max() +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ + 0xc0000000 : 0xFFFFe000) + +#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) + +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX + +/* + * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual + * address for the kernel image, rather than the limit on the size itself. + * This can be at most 1 GiB, due to the fixmap living in the next 1 GiB (see + * level2_kernel_pgt in arch/x86/kernel/head_64.S). + * + * On KASLR use 1 GiB by default, leaving 1 GiB for modules once the + * page tables are fully set up. + * + * If KASLR is disabled we can shrink it to 0.5 GiB and increase the size + * of the modules area to 1.5 GiB. + */ +#ifdef CONFIG_RANDOMIZE_BASE +#define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024) +#else +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#endif + +#endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h new file mode 100644 index 000000000..a506a4114 --- /dev/null +++ b/arch/x86/include/asm/page_types.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PAGE_DEFS_H +#define _ASM_X86_PAGE_DEFS_H + +#include +#include +#include + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) + +#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) + +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) + +/* Cast *PAGE_MASK to a signed type so that it is sign-extended if + virtual addresses are 32-bits but physical addresses are larger + (ie, 32-bit PAE). */ +#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) + +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define HUGE_MAX_HSTATE 2 + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC + +#define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ + CONFIG_PHYSICAL_ALIGN) + +#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) + +#ifdef CONFIG_X86_64 +#include +#define IOREMAP_MAX_ORDER (PUD_SHIFT) +#else +#include +#define IOREMAP_MAX_ORDER (PMD_SHIFT) +#endif /* CONFIG_X86_64 */ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +extern phys_addr_t physical_mask; +#define __PHYSICAL_MASK physical_mask +#else +#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) +#endif + +extern int devmem_is_allowed(unsigned long pagenr); + +extern unsigned long max_low_pfn_mapped; +extern unsigned long max_pfn_mapped; + +static inline phys_addr_t get_max_mapped(void) +{ + return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; +} + +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); + +extern void initmem_init(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PAGE_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h new file mode 100644 index 000000000..2a0b8dd4e --- /dev/null +++ b/arch/x86/include/asm/paravirt.h @@ -0,0 +1,795 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PARAVIRT_H +#define _ASM_X86_PARAVIRT_H +/* Various instructions on x86 need to be replaced for + * para-virtualization: those hooks are defined here. */ + +#ifdef CONFIG_PARAVIRT +#include +#include +#include + +#include + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include + +u64 dummy_steal_clock(int cpu); +u64 dummy_sched_clock(void); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); +DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); + +void paravirt_set_sched_clock(u64 (*func)(void)); + +static inline u64 paravirt_sched_clock(void) +{ + return static_call(pv_sched_clock)(); +} + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +__visible void __native_queued_spin_unlock(struct qspinlock *lock); +bool pv_is_native_spin_unlock(void); +__visible bool __native_vcpu_is_preempted(long cpu); +bool pv_is_native_vcpu_is_preempted(void); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void __init paravirt_set_cap(void); +#endif + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) +{ + PVOP_VCALL0(cpu.io_delay); +#ifdef REALLY_SLOW_IO + PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(cpu.io_delay); +#endif +} + +void native_flush_tlb_local(void); +void native_flush_tlb_global(void); +void native_flush_tlb_one_user(unsigned long addr); +void native_flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +static inline void __flush_tlb_local(void) +{ + PVOP_VCALL0(mmu.flush_tlb_user); +} + +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(mmu.flush_tlb_kernel); +} + +static inline void __flush_tlb_one_user(unsigned long addr) +{ + PVOP_VCALL1(mmu.flush_tlb_one_user, addr); +} + +static inline void __flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); +} + +static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); +} + +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +{ + PVOP_VCALL1(mmu.exit_mmap, mm); +} + +static inline void notify_page_enc_status_changed(unsigned long pfn, + int npages, bool enc) +{ + PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); +} + +#ifdef CONFIG_PARAVIRT_XXL +static inline void load_sp0(unsigned long sp0) +{ + PVOP_VCALL1(cpu.load_sp0, sp0); +} + +/* The paravirtualized CPUID instruction. */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); +} + +/* + * These special macros can be used to get or set a debugging register + */ +static __always_inline unsigned long paravirt_get_debugreg(int reg) +{ + return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); +} +#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) +static __always_inline void set_debugreg(unsigned long val, int reg) +{ + PVOP_VCALL2(cpu.set_debugreg, reg, val); +} + +static inline unsigned long read_cr0(void) +{ + return PVOP_CALL0(unsigned long, cpu.read_cr0); +} + +static inline void write_cr0(unsigned long x) +{ + PVOP_VCALL1(cpu.write_cr0, x); +} + +static __always_inline unsigned long read_cr2(void) +{ + return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, + "mov %%cr2, %%rax;", + ALT_NOT(X86_FEATURE_XENPV)); +} + +static __always_inline void write_cr2(unsigned long x) +{ + PVOP_VCALL1(mmu.write_cr2, x); +} + +static inline unsigned long __read_cr3(void) +{ + return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, + "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline void write_cr3(unsigned long x) +{ + PVOP_ALT_VCALL1(mmu.write_cr3, x, + "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline void __write_cr4(unsigned long x) +{ + PVOP_VCALL1(cpu.write_cr4, x); +} + +static inline void arch_safe_halt(void) +{ + PVOP_VCALL0(irq.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(irq.halt); +} + +static inline void wbinvd(void) +{ + PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline u64 paravirt_read_msr(unsigned msr) +{ + return PVOP_CALL1(u64, cpu.read_msr, msr); +} + +static inline void paravirt_write_msr(unsigned msr, + unsigned low, unsigned high) +{ + PVOP_VCALL3(cpu.write_msr, msr, low, high); +} + +static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); +} + +static inline int paravirt_write_msr_safe(unsigned msr, + unsigned low, unsigned high) +{ + return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); +} + +#define rdmsr(msr, val1, val2) \ +do { \ + u64 _l = paravirt_read_msr(msr); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ +} while (0) + +#define wrmsr(msr, val1, val2) \ +do { \ + paravirt_write_msr(msr, val1, val2); \ +} while (0) + +#define rdmsrl(msr, val) \ +do { \ + val = paravirt_read_msr(msr); \ +} while (0) + +static inline void wrmsrl(unsigned msr, u64 val) +{ + wrmsr(msr, (u32)val, (u32)(val>>32)); +} + +#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr, a, b) \ +({ \ + int _err; \ + u64 _l = paravirt_read_msr_safe(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; \ +}) + +static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = paravirt_read_msr_safe(msr, &err); + return err; +} + +static inline unsigned long long paravirt_read_pmc(int counter) +{ + return PVOP_CALL1(u64, cpu.read_pmc, counter); +} + +#define rdpmc(counter, low, high) \ +do { \ + u64 _l = paravirt_read_pmc(counter); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while (0) + +#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(cpu.free_ldt, ldt, entries); +} + +static inline void load_TR_desc(void) +{ + PVOP_VCALL0(cpu.load_tr_desc); +} +static inline void load_gdt(const struct desc_ptr *dtr) +{ + PVOP_VCALL1(cpu.load_gdt, dtr); +} +static inline void load_idt(const struct desc_ptr *dtr) +{ + PVOP_VCALL1(cpu.load_idt, dtr); +} +static inline void set_ldt(const void *addr, unsigned entries) +{ + PVOP_VCALL2(cpu.set_ldt, addr, entries); +} +static inline unsigned long paravirt_store_tr(void) +{ + return PVOP_CALL0(unsigned long, cpu.store_tr); +} + +#define store_tr(tr) ((tr) = paravirt_store_tr()) +static inline void load_TLS(struct thread_struct *t, unsigned cpu) +{ + PVOP_VCALL2(cpu.load_tls, t, cpu); +} + +static inline void load_gs_index(unsigned int gs) +{ + PVOP_VCALL1(cpu.load_gs_index, gs); +} + +static inline void write_ldt_entry(struct desc_struct *dt, int entry, + const void *desc) +{ + PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc); +} + +static inline void write_gdt_entry(struct desc_struct *dt, int entry, + void *desc, int type) +{ + PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type); +} + +static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) +{ + PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); +} + +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_invalidate_io_bitmap(void) +{ + PVOP_VCALL0(cpu.invalidate_io_bitmap); +} + +static inline void tss_update_io_bitmap(void) +{ + PVOP_VCALL0(cpu.update_io_bitmap); +} +#endif + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + PVOP_VCALL2(mmu.activate_mm, prev, next); +} + +static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); +} + +static inline int paravirt_pgd_alloc(struct mm_struct *mm) +{ + return PVOP_CALL1(int, mmu.pgd_alloc, mm); +} + +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + PVOP_VCALL2(mmu.pgd_free, mm, pgd); +} + +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(mmu.alloc_pte, mm, pfn); +} +static inline void paravirt_release_pte(unsigned long pfn) +{ + PVOP_VCALL1(mmu.release_pte, pfn); +} + +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(mmu.alloc_pmd, mm, pfn); +} + +static inline void paravirt_release_pmd(unsigned long pfn) +{ + PVOP_VCALL1(mmu.release_pmd, pfn); +} + +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(mmu.alloc_pud, mm, pfn); +} +static inline void paravirt_release_pud(unsigned long pfn) +{ + PVOP_VCALL1(mmu.release_pud, pfn); +} + +static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(mmu.alloc_p4d, mm, pfn); +} + +static inline void paravirt_release_p4d(unsigned long pfn) +{ + PVOP_VCALL1(mmu.release_p4d, pfn); +} + +static inline pte_t __pte(pteval_t val) +{ + return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)) }; +} + +static inline pteval_t pte_val(pte_t pte) +{ + return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline pgd_t __pgd(pgdval_t val) +{ + return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)) }; +} + +static inline pgdval_t pgd_val(pgd_t pgd) +{ + return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + pteval_t ret; + + ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep); + + return (pte_t) { .pte = ret }; +} + +static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + + PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); +} + +static inline pmd_t __pmd(pmdval_t val) +{ + return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)) }; +} + +static inline pmdval_t pmd_val(pmd_t pmd) +{ + return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud)); +} + +static inline pud_t __pud(pudval_t val) +{ + pudval_t ret; + + ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + + return (pud_t) { ret }; +} + +static inline pudval_t pud_val(pud_t pud) +{ + return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, native_make_pud(0)); +} + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + p4dval_t val = native_p4d_val(p4d); + + PVOP_VCALL2(mmu.set_p4d, p4dp, val); +} + +#if CONFIG_PGTABLE_LEVELS >= 5 + +static inline p4d_t __p4d(p4dval_t val) +{ + p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val, + "mov %%rdi, %%rax", + ALT_NOT(X86_FEATURE_XENPV)); + + return (p4d_t) { ret }; +} + +static inline p4dval_t p4d_val(p4d_t p4d) +{ + return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d, + "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); +} + +static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd)); +} + +#define set_pgd(pgdp, pgdval) do { \ + if (pgtable_l5_enabled()) \ + __set_pgd(pgdp, pgdval); \ + else \ + set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ +} while (0) + +#define pgd_clear(pgdp) do { \ + if (pgtable_l5_enabled()) \ + set_pgd(pgdp, native_make_pgd(0)); \ +} while (0) + +#endif /* CONFIG_PGTABLE_LEVELS == 5 */ + +static inline void p4d_clear(p4d_t *p4dp) +{ + set_p4d(p4dp, native_make_p4d(0)); +} + +static inline void set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_pte(ptep, pte); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + set_pte(ptep, native_make_pte(0)); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, native_make_pmd(0)); +} + +#define __HAVE_ARCH_START_CONTEXT_SWITCH +static inline void arch_start_context_switch(struct task_struct *prev) +{ + PVOP_VCALL1(cpu.start_context_switch, prev); +} + +static inline void arch_end_context_switch(struct task_struct *next) +{ + PVOP_VCALL1(cpu.end_context_switch, next); +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE +static inline void arch_enter_lazy_mmu_mode(void) +{ + PVOP_VCALL0(mmu.lazy_mode.enter); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + PVOP_VCALL0(mmu.lazy_mode.leave); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL0(mmu.lazy_mode.flush); +} + +static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags) +{ + pv_ops.mmu.set_fixmap(idx, phys, flags); +} +#endif + +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) + +static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, + u32 val) +{ + PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val); +} + +static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock, + "movb $0, (%%" _ASM_ARG1 ");", + ALT_NOT(X86_FEATURE_PVUNLOCK)); +} + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + PVOP_VCALL2(lock.wait, ptr, val); +} + +static __always_inline void pv_kick(int cpu) +{ + PVOP_VCALL1(lock.kick, cpu); +} + +static __always_inline bool pv_vcpu_is_preempted(long cpu) +{ + return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu, + "xor %%" _ASM_AX ", %%" _ASM_AX ";", + ALT_NOT(X86_FEATURE_VCPUPREEMPT)); +} + +void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); +bool __raw_callee_save___native_vcpu_is_preempted(long cpu); + +#endif /* SMP && PARAVIRT_SPINLOCKS */ + +#ifdef CONFIG_X86_32 +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" +#else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" +#endif + +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_THUNK_NAME(func) "__raw_callee_save_" #func +#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \ + extern typeof(func) __raw_callee_save_##func; \ + \ + asm(".pushsection " section ", \"ax\";" \ + ".globl " PV_THUNK_NAME(func) ";" \ + ".type " PV_THUNK_NAME(func) ", @function;" \ + PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ + FRAME_BEGIN \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + FRAME_END \ + ASM_RET \ + ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ + ".popsection") + +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + __PV_CALLEE_SAVE_REGS_THUNK(func, ".text") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + +#ifdef CONFIG_PARAVIRT_XXL +static __always_inline unsigned long arch_local_save_flags(void) +{ + return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", + ALT_NOT(X86_FEATURE_XENPV)); +} + +static __always_inline void arch_local_irq_disable(void) +{ + PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV)); +} + +static __always_inline void arch_local_irq_enable(void) +{ + PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV)); +} + +static __always_inline unsigned long arch_local_irq_save(void) +{ + unsigned long f; + + f = arch_local_save_flags(); + arch_local_irq_disable(); + return f; +} +#endif + + +/* Make sure as little as possible of this mess escapes. */ +#undef PARAVIRT_CALL +#undef __PVOP_CALL +#undef __PVOP_VCALL +#undef PVOP_VCALL0 +#undef PVOP_CALL0 +#undef PVOP_VCALL1 +#undef PVOP_CALL1 +#undef PVOP_VCALL2 +#undef PVOP_CALL2 +#undef PVOP_VCALL3 +#undef PVOP_CALL3 +#undef PVOP_VCALL4 +#undef PVOP_CALL4 + +extern void default_banner(void); + +#else /* __ASSEMBLY__ */ + +#define _PVSITE(ptype, ops, word, algn) \ +771:; \ + ops; \ +772:; \ + .pushsection .parainstructions,"a"; \ + .align algn; \ + word 771b; \ + .byte ptype; \ + .byte 772b-771b; \ + _ASM_ALIGN; \ + .popsection + + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT_XXL + +#define PARA_PATCH(off) ((off) / 8) +#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) +#define PARA_INDIRECT(addr) *addr(%rip) + +#ifdef CONFIG_DEBUG_ENTRY +.macro PARA_IRQ_save_fl + PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), + ANNOTATE_RETPOLINE_SAFE; + call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);) +.endm + +#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \ + ALT_NOT(X86_FEATURE_XENPV) +#endif +#endif /* CONFIG_PARAVIRT_XXL */ +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLY__ */ +#else /* CONFIG_PARAVIRT */ +# define default_banner x86_init_noop +#endif /* !CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#ifndef CONFIG_PARAVIRT_XXL +static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} +#endif + +#ifndef CONFIG_PARAVIRT +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +{ +} +#endif + +#ifndef CONFIG_PARAVIRT_SPINLOCKS +static inline void paravirt_set_cap(void) +{ +} +#endif +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000..65ac7cee0 --- /dev/null +++ b/arch/x86/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h new file mode 100644 index 000000000..f3d601574 --- /dev/null +++ b/arch/x86/include/asm/paravirt_types.h @@ -0,0 +1,608 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PARAVIRT_TYPES_H +#define _ASM_X86_PARAVIRT_TYPES_H + +/* Bitmask of what can be clobbered: usually at least eax. */ +#define CLBR_EAX (1 << 0) +#define CLBR_ECX (1 << 1) +#define CLBR_EDX (1 << 2) +#define CLBR_EDI (1 << 3) + +#ifdef CONFIG_X86_32 +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) +#else +#define CLBR_RAX CLBR_EAX +#define CLBR_RCX CLBR_ECX +#define CLBR_RDX CLBR_EDX +#define CLBR_RDI CLBR_EDI +#define CLBR_RSI (1 << 4) +#define CLBR_R8 (1 << 5) +#define CLBR_R9 (1 << 6) +#define CLBR_R10 (1 << 7) +#define CLBR_R11 (1 << 8) + +#define CLBR_ANY ((1 << 9) - 1) + +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ + CLBR_RCX | CLBR_R8 | CLBR_R9) +#define CLBR_RET_REG (CLBR_RAX) + +#endif /* X86_64 */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +struct page; +struct thread_struct; +struct desc_ptr; +struct tss_struct; +struct mm_struct; +struct desc_struct; +struct task_struct; +struct cpumask; +struct flush_tlb_info; +struct mmu_gather; +struct vm_area_struct; + +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + +/* general info */ +struct pv_info { +#ifdef CONFIG_PARAVIRT_XXL + u16 extra_user_64bit_cs; /* __USER_CS if none */ +#endif + + const char *name; +}; + +#ifdef CONFIG_PARAVIRT_XXL +struct pv_lazy_ops { + /* Set deferred update mode, used for batching operations. */ + void (*enter)(void); + void (*leave)(void); + void (*flush)(void); +} __no_randomize_layout; +#endif + +struct pv_cpu_ops { + /* hooks for various privileged instructions */ + void (*io_delay)(void); + +#ifdef CONFIG_PARAVIRT_XXL + unsigned long (*get_debugreg)(int regno); + void (*set_debugreg)(int regno, unsigned long value); + + unsigned long (*read_cr0)(void); + void (*write_cr0)(unsigned long); + + void (*write_cr4)(unsigned long); + + /* Segment descriptor handling */ + void (*load_tr_desc)(void); + void (*load_gdt)(const struct desc_ptr *); + void (*load_idt)(const struct desc_ptr *); + void (*set_ldt)(const void *desc, unsigned entries); + unsigned long (*store_tr)(void); + void (*load_tls)(struct thread_struct *t, unsigned int cpu); + void (*load_gs_index)(unsigned int idx); + void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, + const void *desc); + void (*write_gdt_entry)(struct desc_struct *, + int entrynum, const void *desc, int size); + void (*write_idt_entry)(gate_desc *, + int entrynum, const gate_desc *gate); + void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); + void (*free_ldt)(struct desc_struct *ldt, unsigned entries); + + void (*load_sp0)(unsigned long sp0); + +#ifdef CONFIG_X86_IOPL_IOPERM + void (*invalidate_io_bitmap)(void); + void (*update_io_bitmap)(void); +#endif + + void (*wbinvd)(void); + + /* cpuid emulation, mostly so that caps bits can be disabled */ + void (*cpuid)(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + + /* Unsafe MSR operations. These will warn or panic on failure. */ + u64 (*read_msr)(unsigned int msr); + void (*write_msr)(unsigned int msr, unsigned low, unsigned high); + + /* + * Safe MSR operations. + * read sets err to 0 or -EIO. write returns 0 or -EIO. + */ + u64 (*read_msr_safe)(unsigned int msr, int *err); + int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high); + + u64 (*read_pmc)(int counter); + + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); +#endif +} __no_randomize_layout; + +struct pv_irq_ops { +#ifdef CONFIG_PARAVIRT_XXL + /* + * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF; + * all other bits returned from save_fl are undefined. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. + */ + struct paravirt_callee_save save_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + + void (*safe_halt)(void); + void (*halt)(void); +#endif +} __no_randomize_layout; + +struct pv_mmu_ops { + /* TLB operations */ + void (*flush_tlb_user)(void); + void (*flush_tlb_kernel)(void); + void (*flush_tlb_one_user)(unsigned long addr); + void (*flush_tlb_multi)(const struct cpumask *cpus, + const struct flush_tlb_info *info); + + void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); + + /* Hook for intercepting the destruction of an mm_struct. */ + void (*exit_mmap)(struct mm_struct *mm); + void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc); + +#ifdef CONFIG_PARAVIRT_XXL + struct paravirt_callee_save read_cr2; + void (*write_cr2)(unsigned long); + + unsigned long (*read_cr3)(void); + void (*write_cr3)(unsigned long); + + /* Hooks for intercepting the creation/use of an mm_struct. */ + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + + /* Hooks for allocating and freeing a pagetable top-level */ + int (*pgd_alloc)(struct mm_struct *mm); + void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); + + /* + * Hooks for allocating/releasing pagetable pages when they're + * attached to a pagetable + */ + void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_p4d)(struct mm_struct *mm, unsigned long pfn); + void (*release_pte)(unsigned long pfn); + void (*release_pmd)(unsigned long pfn); + void (*release_pud)(unsigned long pfn); + void (*release_p4d)(unsigned long pfn); + + /* Pagetable manipulation functions */ + void (*set_pte)(pte_t *ptep, pte_t pteval); + void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); + + pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep); + void (*ptep_modify_prot_commit)(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t pte); + + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; + + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; + + void (*set_pud)(pud_t *pudp, pud_t pudval); + + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; + + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; + + void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval); + +#if CONFIG_PGTABLE_LEVELS >= 5 + struct paravirt_callee_save p4d_val; + struct paravirt_callee_save make_p4d; + + void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval); +#endif /* CONFIG_PGTABLE_LEVELS >= 5 */ + + struct pv_lazy_ops lazy_mode; + + /* dom0 ops */ + + /* Sometimes the physical address is a pfn, and sometimes its + an mfn. We can tell which is which from the index. */ + void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags); +#endif +} __no_randomize_layout; + +struct arch_spinlock; +#ifdef CONFIG_SMP +#include +#endif + +struct qspinlock; + +struct pv_lock_ops { + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + struct paravirt_callee_save queued_spin_unlock; + + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); + + struct paravirt_callee_save vcpu_is_preempted; +} __no_randomize_layout; + +/* This contains all the paravirt structures: we get a convenient + * number for each function using the offset which we use to indicate + * what to patch. */ +struct paravirt_patch_template { + struct pv_cpu_ops cpu; + struct pv_irq_ops irq; + struct pv_mmu_ops mmu; + struct pv_lock_ops lock; +} __no_randomize_layout; + +extern struct pv_info pv_info; +extern struct paravirt_patch_template pv_ops; + +#define PARAVIRT_PATCH(x) \ + (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) + +#define paravirt_type(op) \ + [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ + [paravirt_opptr] "m" (pv_ops.op) +#define paravirt_clobber(clobber) \ + [paravirt_clobber] "i" (clobber) + +/* + * Generate some code, and mark it as patchable by the + * apply_paravirt() alternate instruction patcher. + */ +#define _paravirt_alt(insn_string, type, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 771b\n" \ + " .byte " type "\n" \ + " .byte 772b-771b\n" \ + " .short " clobber "\n" \ + _ASM_ALIGN "\n" \ + ".popsection\n" + +/* Generate patchable code, with the default asm parameters. */ +#define paravirt_alt(insn_string) \ + _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + +/* Simple instruction patching code. */ +#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" + +unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len); + +int paravirt_disable_iospace(void); + +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_patch_template structure, and can therefore be + * freely converted back into a structure offset. + */ +#define PARAVIRT_CALL \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[paravirt_opptr];" + +/* + * These macros are intended to wrap calls through one of the paravirt + * ops structs, so that they can be later identified and patched at + * runtime. + * + * Normally, a call to a pv_op function is a simple indirect call: + * (pv_op_struct.operations)(args...). + * + * Unfortunately, this is a relatively slow operation for modern CPUs, + * because it cannot necessarily determine what the destination + * address is. In this case, the address is a runtime constant, so at + * the very least we can patch the call to a simple direct call, or, + * ideally, patch an inline implementation into the callsite. (Direct + * calls are essentially free, because the call and return addresses + * are completely predictable.) + * + * For i386, these macros rely on the standard gcc "regparm(3)" calling + * convention, in which the first three arguments are placed in %eax, + * %edx, %ecx (in that order), and the remaining arguments are placed + * on the stack. All caller-save registers (eax,edx,ecx) are expected + * to be modified (either clobbered or used for return values). + * X86_64, on the other hand, already specifies a register-based calling + * conventions, returning at %rax, with parameters going in %rdi, %rsi, + * %rdx, and %rcx. Note that for this reason, x86_64 does not need any + * special handling for dealing with 4 arguments, unlike i386. + * However, x86_64 also has to clobber all caller saved registers, which + * unfortunately, are quite a bit (r8 - r11) + * + * The call instruction itself is marked by placing its start address + * and size into the .parainstructions section, so that + * apply_paravirt() in arch/i386/kernel/alternative.c can do the + * appropriate patching under the control of the backend pv_init_ops + * implementation. + * + * Unfortunately there's no way to get gcc to generate the args setup + * for the call, and then allow the call itself to be generated by an + * inline asm. Because of this, we must do the complete arg setup and + * return value handling from within these macros. This is fairly + * cumbersome. + * + * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. + * It could be extended to more arguments, but there would be little + * to be gained from that. For each number of arguments, there are + * two VCALL and CALL variants for void and non-void functions. + * + * When there is a return value, the invoker of the macro must specify + * the return type. The macro then uses sizeof() on that type to + * determine whether it's a 32 or 64 bit value and places the return + * in the right register(s) (just %eax for 32-bit, and %edx:%eax for + * 64-bit). For x86_64 machines, it just returns in %rax regardless of + * the return value size. + * + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments; + * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments + * in low,high order + * + * Small structures are passed and returned in registers. The macro + * calling convention can't directly deal with this, so the wrapper + * functions must do it. + * + * These PVOP_* macros are only defined within this header. This + * means that all uses must be wrapped in inline functions. This also + * makes sure the incoming and outgoing types are always correct. + */ +#ifdef CONFIG_X86_32 +#define PVOP_CALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; + +#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) + +#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + +#define EXTRA_CLOBBERS +#define VEXTRA_CLOBBERS +#else /* CONFIG_X86_64 */ +/* [re]ax isn't an arg, but the return val */ +#define PVOP_CALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx, __eax = __eax; + +#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) + +#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ + "=S" (__esi), "=d" (__edx), \ + "=c" (__ecx) +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) + +/* + * void functions are still allowed [re]ax for scratch. + * + * The ZERO_CALL_USED REGS feature may end up zeroing out callee-saved + * registers. Make sure we model this with the appropriate clobbers. + */ +#ifdef CONFIG_ZERO_CALL_USED_REGS +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), PVOP_VCALL_CLOBBERS +#else +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#endif +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + +#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" +#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_PARAVIRT_DEBUG +#define PVOP_TEST_NULL(op) BUG_ON(pv_ops.op == NULL) +#else +#define PVOP_TEST_NULL(op) ((void)pv_ops.op) +#endif + +#define PVOP_RETVAL(rettype) \ + ({ unsigned long __mask = ~0UL; \ + BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \ + switch (sizeof(rettype)) { \ + case 1: __mask = 0xffUL; break; \ + case 2: __mask = 0xffffUL; break; \ + case 4: __mask = 0xffffffffUL; break; \ + default: break; \ + } \ + __mask & __eax; \ + }) + + +#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...) \ + ({ \ + PVOP_CALL_ARGS; \ + PVOP_TEST_NULL(op); \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : call_clbr, ASM_CALL_CONSTRAINT \ + : paravirt_type(op), \ + paravirt_clobber(clbr), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + ret; \ + }) + +#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \ + extra_clbr, ...) \ + ({ \ + PVOP_CALL_ARGS; \ + PVOP_TEST_NULL(op); \ + asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \ + alt, cond) \ + : call_clbr, ASM_CALL_CONSTRAINT \ + : paravirt_type(op), \ + paravirt_clobber(clbr), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + ret; \ + }) + +#define __PVOP_CALL(rettype, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY, \ + PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__) + +#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\ + PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \ + ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + +#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \ + CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + + +#define __PVOP_VCALL(op, ...) \ + (void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, ##__VA_ARGS__) + +#define __PVOP_ALT_VCALL(op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY, \ + PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \ + ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(op, ...) \ + (void)____PVOP_CALL(, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) + +#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) + + +#define PVOP_CALL0(rettype, op) \ + __PVOP_CALL(rettype, op) +#define PVOP_VCALL0(op) \ + __PVOP_VCALL(op) +#define PVOP_ALT_CALL0(rettype, op, alt, cond) \ + __PVOP_ALT_CALL(rettype, op, alt, cond) +#define PVOP_ALT_VCALL0(op, alt, cond) \ + __PVOP_ALT_VCALL(op, alt, cond) + +#define PVOP_CALLEE0(rettype, op) \ + __PVOP_CALLEESAVE(rettype, op) +#define PVOP_VCALLEE0(op) \ + __PVOP_VCALLEESAVE(op) +#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond) +#define PVOP_ALT_VCALLEE0(op, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(op, alt, cond) + + +#define PVOP_CALL1(rettype, op, arg1) \ + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALL1(op, arg1) \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \ + __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1)) + +#define PVOP_CALLEE1(rettype, op, arg1) \ + __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(op, arg1) \ + __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1)) + + +#define PVOP_CALL2(rettype, op, arg1, arg2) \ + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALL2(op, arg1, arg2) \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) + +#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) +#define PVOP_VCALL3(op, arg1, arg2, arg3) \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) + +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE, + PARAVIRT_LAZY_MMU, + PARAVIRT_LAZY_CPU, +}; + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + +void paravirt_enter_lazy_mmu(void); +void paravirt_leave_lazy_mmu(void); +void paravirt_flush_lazy_mmu(void); + +void _paravirt_nop(void); +void paravirt_BUG(void); +u64 _paravirt_ident_64(u64); +unsigned long paravirt_ret0(void); + +#define paravirt_nop ((void *)_paravirt_nop) + +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch_site { + u8 *instr; /* original instructions */ + u8 type; /* type of this instruction */ + u8 len; /* length of original instruction */ +}; + +extern struct paravirt_patch_site __parainstructions[], + __parainstructions_end[]; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_PARAVIRT_TYPES_H */ diff --git a/arch/x86/include/asm/parport.h b/arch/x86/include/asm/parport.h new file mode 100644 index 000000000..163f78259 --- /dev/null +++ b/arch/x86/include/asm/parport.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PARPORT_H +#define _ASM_X86_PARPORT_H + +static int parport_pc_find_isa_ports(int autoirq, int autodma); +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return parport_pc_find_isa_ports(autoirq, autodma); +} + +#endif /* _ASM_X86_PARPORT_H */ diff --git a/arch/x86/include/asm/pc-conf-reg.h b/arch/x86/include/asm/pc-conf-reg.h new file mode 100644 index 000000000..56bceceac --- /dev/null +++ b/arch/x86/include/asm/pc-conf-reg.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for the configuration register space at port I/O locations + * 0x22 and 0x23 variously used by PC architectures, e.g. the MP Spec, + * Cyrix CPUs, numerous chipsets. + */ +#ifndef _ASM_X86_PC_CONF_REG_H +#define _ASM_X86_PC_CONF_REG_H + +#include +#include +#include + +#define PC_CONF_INDEX 0x22 +#define PC_CONF_DATA 0x23 + +#define PC_CONF_MPS_IMCR 0x70 + +extern raw_spinlock_t pc_conf_lock; + +static inline u8 pc_conf_get(u8 reg) +{ + outb(reg, PC_CONF_INDEX); + return inb(PC_CONF_DATA); +} + +static inline void pc_conf_set(u8 reg, u8 data) +{ + outb(reg, PC_CONF_INDEX); + outb(data, PC_CONF_DATA); +} + +#endif /* _ASM_X86_PC_CONF_REG_H */ diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h new file mode 100644 index 000000000..94597a3cf --- /dev/null +++ b/arch/x86/include/asm/pci-direct.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PCI_DIRECT_H +#define _ASM_X86_PCI_DIRECT_H + +#include + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); +extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); +extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); +extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); +extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); +extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); + +extern int early_pci_allowed(void); +#endif /* _ASM_X86_PCI_DIRECT_H */ diff --git a/arch/x86/include/asm/pci-functions.h b/arch/x86/include/asm/pci-functions.h new file mode 100644 index 000000000..1bbc10812 --- /dev/null +++ b/arch/x86/include/asm/pci-functions.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PCI BIOS function numbering for conventional PCI BIOS + * systems + */ + +#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX +#define PCIBIOS_PCI_BIOS_PRESENT 0xb101 +#define PCIBIOS_FIND_PCI_DEVICE 0xb102 +#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103 +#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106 +#define PCIBIOS_READ_CONFIG_BYTE 0xb108 +#define PCIBIOS_READ_CONFIG_WORD 0xb109 +#define PCIBIOS_READ_CONFIG_DWORD 0xb10a +#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b +#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c +#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d +#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e +#define PCIBIOS_SET_PCI_HW_INT 0xb10f + diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h new file mode 100644 index 000000000..736793d65 --- /dev/null +++ b/arch/x86/include/asm/pci.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PCI_H +#define _ASM_X86_PCI_H + +#include /* for struct page */ +#include +#include +#include +#include +#include +#include +#include +#include + +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ +#ifdef CONFIG_ACPI + struct acpi_device *companion; /* ACPI companion device */ +#endif +#ifdef CONFIG_X86_64 + void *iommu; /* IOMMU private data */ +#endif +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN + void *fwnode; /* IRQ domain for MSI assignment */ +#endif +#if IS_ENABLED(CONFIG_VMD) + struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ +#endif +}; + +extern int pci_routeirq; +extern int noioapicquirk; +extern int noioapicreroute; + +static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus) +{ + return bus->sysdata; +} + +#ifdef CONFIG_PCI + +#ifdef CONFIG_PCI_DOMAINS +static inline int pci_domain_nr(struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} +#endif + +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->fwnode; +} + +#define pci_root_bus_fwnode _pci_root_bus_fwnode +#endif + +#if IS_ENABLED(CONFIG_VMD) +static inline bool is_vmd(struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->vmd_dev != NULL; +} +#else +#define is_vmd(bus) false +#endif /* CONFIG_VMD */ + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +extern unsigned int pcibios_assign_all_busses(void); +extern int pci_legacy_init(void); +#else +static inline int pcibios_assign_all_busses(void) { return 0; } +#endif + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +extern int pcibios_enabled; +void pcibios_scan_root(int bus); + +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + + +#define HAVE_PCI_MMAP +#define arch_can_pci_mmap_wc() pat_enabled() +#define ARCH_GENERIC_PCI_MMAP_RESOURCE + +#ifdef CONFIG_PCI +extern void early_quirks(void); +#else +static inline void early_quirks(void) { } +#endif + +extern void pci_iommu_alloc(void); + +#ifdef CONFIG_NUMA +/* Returns the node based on pci bus */ +static inline int __pcibus_to_node(const struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->node; +} + +static inline const struct cpumask * +cpumask_of_pcibus(const struct pci_bus *bus) +{ + int node; + + node = __pcibus_to_node(bus); + return (node == NUMA_NO_NODE) ? cpu_online_mask : + cpumask_of_node(node); +} +#endif + +struct pci_setup_rom { + struct setup_data data; + uint16_t vendor; + uint16_t devid; + uint64_t pcilen; + unsigned long segment; + unsigned long bus; + unsigned long device; + unsigned long function; + uint8_t romdata[]; +}; + +#endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h new file mode 100644 index 000000000..70533fdcb --- /dev/null +++ b/arch/x86/include/asm/pci_x86.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__) +#else +#define DBG(fmt, ...) \ +do { \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ +} while (0) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 + +#define PCI_NO_CHECKS 0x0400 +#define PCI_USE_PIRQ_MASK 0x0800 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 +#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 +#define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_HAS_IO_ECS 0x40000 +#define PCI_NOASSIGN_ROMS 0x80000 +#define PCI_ROOT_NO_CRS 0x100000 +#define PCI_NOASSIGN_BARS 0x200000 +#define PCI_BIG_ROOT_WINDOW 0x400000 +#define PCI_USE_E820 0x800000 +#define PCI_NO_E820 0x1000000 + +extern unsigned int pci_probe; +extern unsigned long pirq_table_addr; + +enum pci_bf_sort_state { + pci_bf_sort_default, + pci_force_nobf, + pci_force_bf, + pci_dmi_bf, +}; + +/* pci-i386.c */ + +void pcibios_resource_survey(void); +void pcibios_set_cache_line_size(void); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_ops pci_root_ops; + +void pcibios_scan_specific_bus(int busn); + +/* pci-irq.c */ + +struct pci_dev; + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, + 0 = not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of + interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give 0 */ + struct irq_info slots[]; +} __attribute__((packed)); + +struct irt_routing_table { + u32 signature; /* IRT_SIGNATURE should be here */ + u8 size; /* Number of entries provided */ + u8 used; /* Number of entries actually used */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + struct irq_info slots[]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +extern raw_spinlock_t pci_config_lock; + +extern int (*pcibios_enable_irq)(struct pci_dev *dev); +extern void (*pcibios_disable_irq)(struct pci_dev *dev); + +extern bool mp_should_keep_irq(struct device *dev); + +struct pci_raw_ops { + int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *val); + int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val); +}; + +extern const struct pci_raw_ops *raw_pci_ops; +extern const struct pci_raw_ops *raw_pci_ext_ops; + +extern const struct pci_raw_ops pci_mmcfg; +extern const struct pci_raw_ops pci_direct_conf1; +extern bool port_cf9_safe; + +/* arch_initcall level */ +#ifdef CONFIG_PCI_DIRECT +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); +#else +static inline int pci_direct_probe(void) { return -1; } +static inline void pci_direct_init(int type) { } +#endif + +#ifdef CONFIG_PCI_BIOS +extern void pci_pcbios_init(void); +#else +static inline void pci_pcbios_init(void) { } +#endif + +extern void __init dmi_check_pciprobe(void); +extern void __init dmi_check_skip_isa_align(void); + +/* some common used subsys_initcalls */ +#ifdef CONFIG_PCI +extern int __init pci_acpi_init(void); +#else +static inline int __init pci_acpi_init(void) +{ + return -EINVAL; +} +#endif +extern void __init pcibios_irq_init(void); +extern int __init pcibios_init(void); +extern int pci_legacy_init(void); +extern void pcibios_fixup_irqs(void); + +/* pci-mmconfig.c */ + +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */ +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) + +struct pci_mmcfg_region { + struct list_head list; + struct resource res; + u64 address; + char __iomem *virt; + u16 segment; + u8 start_bus; + u8 end_bus; + char name[PCI_MMCFG_RESOURCE_NAME_LEN]; +}; + +extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); +extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg); +extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg); +extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, + phys_addr_t addr); +extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); +extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); +extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, + int end, u64 addr); + +extern struct list_head pci_mmcfg_list; + +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) + +/* + * On AMD Fam10h CPUs, all PCI MMIO configuration space accesses must use + * %eax. No other source or target registers may be used. The following + * mmio_config_* accessors enforce this. See "BIOS and Kernel Developer's + * Guide (BKDG) For AMD Family 10h Processors", rev. 3.48, sec 2.11.1, + * "MMIO Configuration Coding Requirements". + */ +static inline unsigned char mmio_config_readb(void __iomem *pos) +{ + u8 val; + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned short mmio_config_readw(void __iomem *pos) +{ + u16 val; + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned int mmio_config_readl(void __iomem *pos) +{ + u32 val; + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline void mmio_config_writeb(void __iomem *pos, u8 val) +{ + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writew(void __iomem *pos, u16 val) +{ + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writel(void __iomem *pos, u32 val) +{ + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +#ifdef CONFIG_PCI +# ifdef CONFIG_ACPI +# define x86_default_pci_init pci_acpi_init +# else +# define x86_default_pci_init pci_legacy_init +# endif +# define x86_default_pci_init_irq pcibios_irq_init +# define x86_default_pci_fixup_irqs pcibios_fixup_irqs +#else +# define x86_default_pci_init NULL +# define x86_default_pci_init_irq NULL +# define x86_default_pci_fixup_irqs NULL +#endif + +#if defined(CONFIG_PCI) && defined(CONFIG_ACPI) +extern bool pci_use_e820; +#else +#define pci_use_e820 false +#endif diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h new file mode 100644 index 000000000..13c0d63ed --- /dev/null +++ b/arch/x86/include/asm/percpu.h @@ -0,0 +1,459 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PERCPU_H +#define _ASM_X86_PERCPU_H + +#ifdef CONFIG_X86_64 +#define __percpu_seg gs +#else +#define __percpu_seg fs +#endif + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_SMP +#define PER_CPU_VAR(var) %__percpu_seg:var +#else /* ! SMP */ +#define PER_CPU_VAR(var) var +#endif /* SMP */ + +#ifdef CONFIG_X86_64_SMP +#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +#else +#define INIT_PER_CPU_VAR(var) var +#endif + +#else /* ...!ASSEMBLY */ + +#include +#include + +#ifdef CONFIG_SMP +#define __percpu_prefix "%%"__stringify(__percpu_seg)":" +#define __my_cpu_offset this_cpu_read(this_cpu_off) + +/* + * Compared to the generic __my_cpu_offset version, the following + * saves one instruction and avoids clobbering a temp register. + */ +#define arch_raw_cpu_ptr(ptr) \ +({ \ + unsigned long tcp_ptr__; \ + asm ("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (this_cpu_off), "0" (ptr)); \ + (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ +}) +#else +#define __percpu_prefix "" +#endif + +#define __percpu_arg(x) __percpu_prefix "%" #x + +/* + * Initialized pointers to per-cpu variables needed for the boot + * processor need to use these macros to get the proper address + * offset from __per_cpu_load on SMP. + * + * There also must be an entry in vmlinux_64.lds.S + */ +#define DECLARE_INIT_PER_CPU(var) \ + extern typeof(var) init_per_cpu_var(var) + +#ifdef CONFIG_X86_64_SMP +#define init_per_cpu_var(var) init_per_cpu__##var +#else +#define init_per_cpu_var(var) var +#endif + +/* For arch-specific code, we can use direct single-insn ops (they + * don't give an lvalue though). */ + +#define __pcpu_type_1 u8 +#define __pcpu_type_2 u16 +#define __pcpu_type_4 u32 +#define __pcpu_type_8 u64 + +#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff)) +#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff)) +#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff)) +#define __pcpu_cast_8(val) ((u64)(val)) + +#define __pcpu_op1_1(op, dst) op "b " dst +#define __pcpu_op1_2(op, dst) op "w " dst +#define __pcpu_op1_4(op, dst) op "l " dst +#define __pcpu_op1_8(op, dst) op "q " dst + +#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst +#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst +#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst +#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst + +#define __pcpu_reg_1(mod, x) mod "q" (x) +#define __pcpu_reg_2(mod, x) mod "r" (x) +#define __pcpu_reg_4(mod, x) mod "r" (x) +#define __pcpu_reg_8(mod, x) mod "r" (x) + +#define __pcpu_reg_imm_1(x) "qi" (x) +#define __pcpu_reg_imm_2(x) "ri" (x) +#define __pcpu_reg_imm_4(x) "ri" (x) +#define __pcpu_reg_imm_8(x) "re" (x) + +#define percpu_to_op(size, qual, op, _var, _val) \ +do { \ + __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ + if (0) { \ + typeof(_var) pto_tmp__; \ + pto_tmp__ = (_val); \ + (void)pto_tmp__; \ + } \ + asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \ + : [var] "+m" (_var) \ + : [val] __pcpu_reg_imm_##size(pto_val__)); \ +} while (0) + +#define percpu_unary_op(size, qual, op, _var) \ +({ \ + asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \ + : [var] "+m" (_var)); \ +}) + +/* + * Generate a percpu add to memory instruction and optimize code + * if one is added or subtracted. + */ +#define percpu_add_op(size, qual, var, val) \ +do { \ + const int pao_ID__ = (__builtin_constant_p(val) && \ + ((val) == 1 || (val) == -1)) ? \ + (int)(val) : 0; \ + if (0) { \ + typeof(var) pao_tmp__; \ + pao_tmp__ = (val); \ + (void)pao_tmp__; \ + } \ + if (pao_ID__ == 1) \ + percpu_unary_op(size, qual, "inc", var); \ + else if (pao_ID__ == -1) \ + percpu_unary_op(size, qual, "dec", var); \ + else \ + percpu_to_op(size, qual, "add", var, val); \ +} while (0) + +#define percpu_from_op(size, qual, op, _var) \ +({ \ + __pcpu_type_##size pfo_val__; \ + asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \ + : [val] __pcpu_reg_##size("=", pfo_val__) \ + : [var] "m" (_var)); \ + (typeof(_var))(unsigned long) pfo_val__; \ +}) + +#define percpu_stable_op(size, op, _var) \ +({ \ + __pcpu_type_##size pfo_val__; \ + asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \ + : [val] __pcpu_reg_##size("=", pfo_val__) \ + : [var] "p" (&(_var))); \ + (typeof(_var))(unsigned long) pfo_val__; \ +}) + +/* + * Add return operation + */ +#define percpu_add_return_op(size, qual, _var, _val) \ +({ \ + __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \ + asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \ + __percpu_arg([var])) \ + : [tmp] __pcpu_reg_##size("+", paro_tmp__), \ + [var] "+m" (_var) \ + : : "memory"); \ + (typeof(_var))(unsigned long) (paro_tmp__ + _val); \ +}) + +/* + * xchg is implemented using cmpxchg without a lock prefix. xchg is + * expensive due to the implied lock prefix. The processor cannot prefetch + * cachelines if xchg is used. + */ +#define percpu_xchg_op(size, qual, _var, _nval) \ +({ \ + __pcpu_type_##size pxo_old__; \ + __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \ + asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \ + "%[oval]") \ + "\n1:\t" \ + __pcpu_op2_##size("cmpxchg", "%[nval]", \ + __percpu_arg([var])) \ + "\n\tjnz 1b" \ + : [oval] "=&a" (pxo_old__), \ + [var] "+m" (_var) \ + : [nval] __pcpu_reg_##size(, pxo_new__) \ + : "memory"); \ + (typeof(_var))(unsigned long) pxo_old__; \ +}) + +/* + * cmpxchg has no such implied lock semantics as a result it is much + * more efficient for cpu local operations. + */ +#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \ +({ \ + __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \ + __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ + asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \ + __percpu_arg([var])) \ + : [oval] "+a" (pco_old__), \ + [var] "+m" (_var) \ + : [nval] __pcpu_reg_##size(, pco_new__) \ + : "memory"); \ + (typeof(_var))(unsigned long) pco_old__; \ +}) + +/* + * this_cpu_read() makes gcc load the percpu variable every time it is + * accessed while this_cpu_read_stable() allows the value to be cached. + * this_cpu_read_stable() is more efficient and can be used if its value + * is guaranteed to be valid across cpus. The current users include + * get_current() and get_thread_info() both of which are actually + * per-thread variables implemented as per-cpu variables and thus + * stable for the duration of the respective task. + */ +#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp) +#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp) +#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp) +#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp) +#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp) + +#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp) +#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp) +#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp) + +#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val) +#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val) +#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val) +#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) +#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val) +#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val) +#define raw_cpu_and_1(pcp, val) percpu_to_op(1, , "and", (pcp), val) +#define raw_cpu_and_2(pcp, val) percpu_to_op(2, , "and", (pcp), val) +#define raw_cpu_and_4(pcp, val) percpu_to_op(4, , "and", (pcp), val) +#define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val) +#define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val) +#define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val) + +/* + * raw_cpu_xchg() can use a load-store since it is not required to be + * IRQ-safe. + */ +#define raw_percpu_xchg_op(var, nval) \ +({ \ + typeof(var) pxo_ret__ = raw_cpu_read(var); \ + raw_cpu_write(var, (nval)); \ + pxo_ret__; \ +}) + +#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) + +#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp) +#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp) +#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp) +#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_to_op(1, volatile, "and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_to_op(2, volatile, "and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_to_op(4, volatile, "and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val) +#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval) +#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval) +#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval) + +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval) + +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val) +#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval) + +#ifdef CONFIG_X86_CMPXCHG64 +#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ +({ \ + bool __ret; \ + typeof(pcp1) __o1 = (o1), __n1 = (n1); \ + typeof(pcp2) __o2 = (o2), __n2 = (n2); \ + asm volatile("cmpxchg8b "__percpu_arg(1) \ + CC_SET(z) \ + : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ + : "b" (__n1), "c" (__n2)); \ + __ret; \ +}) + +#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double +#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double +#endif /* CONFIG_X86_CMPXCHG64 */ + +/* + * Per cpu atomic 64 bit operations are only available under 64 bit. + * 32 bit must fall back to generic operations. + */ +#ifdef CONFIG_X86_64 +#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp) +#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val) +#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val) +#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val) +#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val) +#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval) + +#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp) +#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) +#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) + +/* + * Pretty complex macro to generate cmpxchg16 instruction. The instruction + * is not supported on early AMD64 processors so we must be able to emulate + * it in software. The address used in the cmpxchg16 instruction must be + * aligned to a 16 byte boundary. + */ +#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ +({ \ + bool __ret; \ + typeof(pcp1) __o1 = (o1), __n1 = (n1); \ + typeof(pcp2) __o2 = (o2), __n2 = (n2); \ + alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ + "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ + X86_FEATURE_CX16, \ + ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ + "+m" (pcp2), "+d" (__o2)), \ + "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ + __ret; \ +}) + +#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double +#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double + +#endif + +static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, + const unsigned long __percpu *addr) +{ + unsigned long __percpu *a = + (unsigned long __percpu *)addr + nr / BITS_PER_LONG; + +#ifdef CONFIG_X86_64 + return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; +#else + return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; +#endif +} + +static inline bool x86_this_cpu_variable_test_bit(int nr, + const unsigned long __percpu *addr) +{ + bool oldbit; + + asm volatile("btl "__percpu_arg(2)",%1" + CC_SET(c) + : CC_OUT(c) (oldbit) + : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); + + return oldbit; +} + +#define x86_this_cpu_test_bit(nr, addr) \ + (__builtin_constant_p((nr)) \ + ? x86_this_cpu_constant_test_bit((nr), (addr)) \ + : x86_this_cpu_variable_test_bit((nr), (addr))) + + +#include + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_SMP + +/* + * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu + * variables that are initialized and accessed before there are per_cpu + * areas allocated. + */ + +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map + +#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ + DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + +#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ + DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + +#define early_per_cpu_ptr(_name) (_name##_early_ptr) +#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) +#define early_per_cpu(_name, _cpu) \ + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) + +#else /* !CONFIG_SMP */ +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue + +#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ + DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name) + +#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ + DECLARE_PER_CPU_READ_MOSTLY(_type, _name) + +#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) +#define early_per_cpu_ptr(_name) NULL +/* no early_per_cpu_map() */ + +#endif /* !CONFIG_SMP */ + +#endif /* _ASM_X86_PERCPU_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h new file mode 100644 index 000000000..4d810b947 --- /dev/null +++ b/arch/x86/include/asm/perf_event.h @@ -0,0 +1,597 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PERF_EVENT_H +#define _ASM_X86_PERF_EVENT_H + +#include + +/* + * Performance event hw details: + */ + +#define INTEL_PMC_MAX_GENERIC 32 +#define INTEL_PMC_MAX_FIXED 16 +#define INTEL_PMC_IDX_FIXED 32 + +#define X86_PMC_IDX_MAX 64 + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL +#define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL +#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) +#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) +#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19) +#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) +#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) +#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) +#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL + +#define HSW_IN_TX (1ULL << 32) +#define HSW_IN_TX_CHECKPOINTED (1ULL << 33) +#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) +#define ICL_FIXED_0_ADAPTIVE (1ULL << 32) + +#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) +#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) +#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) + +#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 +#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ + (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) + +#define AMD64_EVENTSEL_EVENT \ + (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) +#define INTEL_ARCH_EVENT_MASK \ + (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) + +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + (0xFULL << AMD64_L3_SLICE_SHIFT) +#define AMD64_L3_SLICEID_MASK \ + (0x7ULL << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + (0xFFULL << AMD64_L3_THREAD_SHIFT) +#define AMD64_L3_F19H_THREAD_MASK \ + (0x3ULL << AMD64_L3_THREAD_SHIFT) + +#define AMD64_L3_EN_ALL_CORES BIT_ULL(47) +#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46) + +#define AMD64_L3_COREID_SHIFT 42 +#define AMD64_L3_COREID_MASK \ + (0x7ULL << AMD64_L3_COREID_SHIFT) + +#define X86_RAW_EVENT_MASK \ + (ARCH_PERFMON_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK | \ + ARCH_PERFMON_EVENTSEL_EDGE | \ + ARCH_PERFMON_EVENTSEL_INV | \ + ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS \ + (ARCH_PERFMON_EVENTSEL_EDGE | \ + ARCH_PERFMON_EVENTSEL_INV | \ + ARCH_PERFMON_EVENTSEL_CMASK | \ + ARCH_PERFMON_EVENTSEL_ANY | \ + ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \ + HSW_IN_TX | \ + HSW_IN_TX_CHECKPOINTED) +#define AMD64_RAW_EVENT_MASK \ + (X86_RAW_EVENT_MASK | \ + AMD64_EVENTSEL_EVENT) +#define AMD64_RAW_EVENT_MASK_NB \ + (AMD64_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK) + +#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB \ + (AMD64_EVENTSEL_EVENT | \ + GENMASK_ULL(37, 36)) + +#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB \ + (ARCH_PERFMON_EVENTSEL_UMASK | \ + GENMASK_ULL(27, 24)) + +#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB \ + (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \ + AMD64_PERFMON_V2_EVENTSEL_UMASK_NB) + +#define AMD64_NUM_COUNTERS 4 +#define AMD64_NUM_COUNTERS_CORE 6 +#define AMD64_NUM_COUNTERS_NB 4 + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +#define ARCH_PERFMON_EVENTS_COUNT 7 + +#define PEBS_DATACFG_MEMINFO BIT_ULL(0) +#define PEBS_DATACFG_GP BIT_ULL(1) +#define PEBS_DATACFG_XMMS BIT_ULL(2) +#define PEBS_DATACFG_LBRS BIT_ULL(3) +#define PEBS_DATACFG_LBR_SHIFT 24 + +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +}; + +union cpuid10_edx { + struct { + unsigned int num_counters_fixed:5; + unsigned int bit_width_fixed:8; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; + } split; + unsigned int full; +}; + +/* + * Intel Architectural LBR CPUID detection/enumeration details: + */ +union cpuid28_eax { + struct { + /* Supported LBR depth values */ + unsigned int lbr_depth_mask:8; + unsigned int reserved:22; + /* Deep C-state Reset */ + unsigned int lbr_deep_c_reset:1; + /* IP values contain LIP */ + unsigned int lbr_lip:1; + } split; + unsigned int full; +}; + +union cpuid28_ebx { + struct { + /* CPL Filtering Supported */ + unsigned int lbr_cpl:1; + /* Branch Filtering Supported */ + unsigned int lbr_filter:1; + /* Call-stack Mode Supported */ + unsigned int lbr_call_stack:1; + } split; + unsigned int full; +}; + +union cpuid28_ecx { + struct { + /* Mispredict Bit Supported */ + unsigned int lbr_mispred:1; + /* Timed LBRs Supported */ + unsigned int lbr_timed_lbr:1; + /* Branch Type Field Supported */ + unsigned int lbr_br_type:1; + } split; + unsigned int full; +}; + +/* + * AMD "Extended Performance Monitoring and Debug" CPUID + * detection/enumeration details: + */ +union cpuid_0x80000022_ebx { + struct { + /* Number of Core Performance Counters */ + unsigned int num_core_pmc:4; + /* Number of available LBR Stack Entries */ + unsigned int lbr_v2_stack_sz:6; + /* Number of Data Fabric Counters */ + unsigned int num_df_pmc:6; + } split; + unsigned int full; +}; + +struct x86_pmu_capability { + int version; + int num_counters_gp; + int num_counters_fixed; + int bit_width_gp; + int bit_width_fixed; + unsigned int events_mask; + int events_mask_len; + unsigned int pebs_ept :1; +}; + +/* + * Fixed-purpose performance events: + */ + +/* RDPMC offset for Fixed PMCs */ +#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30) +#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29) + +/* + * All the fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * There is no event-code assigned to the fixed-mode PMCs. + * + * For a fixed-mode PMC, which has an equivalent event on a general-purpose + * PMC, the event-code of the equivalent event is used for the fixed-mode PMC, + * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core. + * + * For a fixed-mode PMC, which doesn't have an equivalent event, a + * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS. + * The pseudo event-code for a fixed-mode PMC must be 0x00. + * The pseudo umask-code is 0xX. The X equals the index of the fixed + * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300. + * + * The counts are available in separate MSRs: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define INTEL_PMC_IDX_FIXED_INSTRUCTIONS (INTEL_PMC_IDX_FIXED + 0) + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) + +/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) +#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) + +/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */ +#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c +#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) +#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) + +static inline bool use_fixed_pseudo_encoding(u64 code) +{ + return !(code & 0xff); +} + +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose the value 47 for the fixed index of BTS, since lower + * values are used by actual fixed events and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15) + +/* + * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for + * each TopDown metric event. + * + * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS). + */ +#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16) +#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0) +#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1) +#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2) +#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3) +#define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4) +#define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5) +#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6) +#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7) +#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND +#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \ + INTEL_PMC_MSK_FIXED_SLOTS) + +/* + * There is no event-code assigned to the TopDown events. + * + * For the slots event, use the pseudo code of the fixed counter 3. + * + * For the metric events, the pseudo event-code is 0x00. + * The pseudo umask-code starts from the middle of the pseudo event + * space, 0x80. + */ +#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */ +/* Level 1 metrics */ +#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */ +#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */ +#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */ +#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */ +/* Level 2 metrics */ +#define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */ +#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */ +#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */ +#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */ + +#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND +#define INTEL_TD_METRIC_NUM 8 + +static inline bool is_metric_idx(int idx) +{ + return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM; +} + +static inline bool is_topdown_idx(int idx) +{ + return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS; +} + +#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \ + (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN) + +#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) +#define GLOBAL_STATUS_BUFFER_OVF_BIT 62 +#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT) +#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) +#define GLOBAL_STATUS_ASIF BIT_ULL(60) +#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) +#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55 +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT) +#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48 + +#define GLOBAL_CTRL_EN_PERF_METRICS 48 +/* + * We model guest LBR event tracing as another fixed-mode PMC like BTS. + * + * We choose bit 58 because it's used to indicate LBR stack frozen state + * for architectural perfmon v4, also we unconditionally mask that bit in + * the handle_pmi_common(), so it'll never be set in the overflow handling. + * + * With this fake counter assigned, the guest LBR event user (such as KVM), + * can program the LBR registers on its own, and we don't actually do anything + * with then in the host context. + */ +#define INTEL_PMC_IDX_FIXED_VLBR (GLOBAL_STATUS_LBRS_FROZEN_BIT) + +/* + * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b, + * since it would claim bit 58 which is effectively Fixed26. + */ +#define INTEL_FIXED_VLBR_EVENT 0x1b00 + +/* + * Adaptive PEBS v4 + */ + +struct pebs_basic { + u64 format_size; + u64 ip; + u64 applicable_counters; + u64 tsc; +}; + +struct pebs_meminfo { + u64 address; + u64 aux; + u64 latency; + u64 tsx_tuning; +}; + +struct pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15; +}; + +struct pebs_xmm { + u64 xmm[16*2]; /* two entries for each register */ +}; + +/* + * AMD Extended Performance Monitoring and Debug cpuid feature detection + */ +#define EXT_PERFMON_DEBUG_FEATURES 0x80000022 + +/* + * IBS cpuid feature detection + */ + +#define IBS_CPUID_FEATURES 0x8000001b + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1U<<0) +#define IBS_CAPS_FETCHSAM (1U<<1) +#define IBS_CAPS_OPSAM (1U<<2) +#define IBS_CAPS_RDWROPCNT (1U<<3) +#define IBS_CAPS_OPCNT (1U<<4) +#define IBS_CAPS_BRNTRGT (1U<<5) +#define IBS_CAPS_OPCNTEXT (1U<<6) +#define IBS_CAPS_RIPINVALIDCHK (1U<<7) +#define IBS_CAPS_OPBRNFUSE (1U<<8) +#define IBS_CAPS_FETCHCTLEXTD (1U<<9) +#define IBS_CAPS_OPDATA4 (1U<<10) +#define IBS_CAPS_ZEN4 (1U<<11) + +#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ + | IBS_CAPS_FETCHSAM \ + | IBS_CAPS_OPSAM) + +/* + * IBS APIC setup + */ +#define IBSCTL 0x1cc +#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) +#define IBSCTL_LVT_OFFSET_MASK 0x0F + +/* IBS fetch bits/masks */ +#define IBS_FETCH_L3MISSONLY (1ULL<<59) +#define IBS_FETCH_RAND_EN (1ULL<<57) +#define IBS_FETCH_VAL (1ULL<<49) +#define IBS_FETCH_ENABLE (1ULL<<48) +#define IBS_FETCH_CNT 0xFFFF0000ULL +#define IBS_FETCH_MAX_CNT 0x0000FFFFULL + +/* + * IBS op bits/masks + * The lower 7 bits of the current count are random bits + * preloaded by hardware and ignored in software + */ +#define IBS_OP_CUR_CNT (0xFFF80ULL<<32) +#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32) +#define IBS_OP_CNT_CTL (1ULL<<19) +#define IBS_OP_VAL (1ULL<<18) +#define IBS_OP_ENABLE (1ULL<<17) +#define IBS_OP_L3MISSONLY (1ULL<<16) +#define IBS_OP_MAX_CNT 0x0000FFFFULL +#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */ +#define IBS_RIP_INVALID (1ULL<<38) + +#ifdef CONFIG_X86_LOCAL_APIC +extern u32 get_ibs_caps(void); +extern int forward_event_to_ibs(struct perf_event *event); +#else +static inline u32 get_ibs_caps(void) { return 0; } +static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; } +#endif + +#ifdef CONFIG_PERF_EVENTS +extern void perf_events_lapic_init(void); + +/* + * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise + * unused and ABI specified to be 0, so nobody should care what we do with + * them. + * + * EXACT - the IP points to the exact instruction that triggered the + * event (HW bugs exempt). + * VM - original X86_VM_MASK; see set_linear_ip(). + */ +#define PERF_EFLAGS_EXACT (1UL << 3) +#define PERF_EFLAGS_VM (1UL << 5) + +struct pt_regs; +struct x86_perf_regs { + struct pt_regs regs; + u64 *xmm_regs; +}; + +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +#include + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ip = (__ip); \ + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ + (regs)->cs = __KERNEL_CS; \ + regs->flags = 0; \ +} + +struct perf_guest_switch_msr { + unsigned msr; + u64 host, guest; +}; + +struct x86_pmu_lbr { + unsigned int nr; + unsigned int from; + unsigned int to; + unsigned int info; +}; + +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); +extern u64 perf_get_hw_event_config(int hw_event); +extern void perf_check_microcode(void); +extern void perf_clear_dirty_counters(void); +extern int x86_perf_rdpmc_index(struct perf_event *event); +#else +static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ + memset(cap, 0, sizeof(*cap)); +} + +static inline u64 perf_get_hw_event_config(int hw_event) +{ + return 0; +} + +static inline void perf_events_lapic_init(void) { } +static inline void perf_check_microcode(void) { } +#endif + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); +extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr); +#else +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); +static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +{ + memset(lbr, 0, sizeof(*lbr)); +} +#endif + +#ifdef CONFIG_CPU_SUP_INTEL + extern void intel_pt_handle_vmx(int on); +#else +static inline void intel_pt_handle_vmx(int on) +{ + +} +#endif + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) + extern void amd_pmu_enable_virt(void); + extern void amd_pmu_disable_virt(void); + +#if defined(CONFIG_PERF_EVENTS_AMD_BRS) + +#define PERF_NEEDS_LOPWR_CB 1 + +/* + * architectural low power callback impacts + * drivers/acpi/processor_idle.c + * drivers/acpi/acpi_pad.c + */ +extern void perf_amd_brs_lopwr_cb(bool lopwr_in); + +DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); + +static inline void perf_lopwr_cb(bool lopwr_in) +{ + static_call_mod(perf_lopwr_cb)(lopwr_in); +} + +#endif /* PERF_NEEDS_LOPWR_CB */ + +#else + static inline void amd_pmu_enable_virt(void) { } + static inline void amd_pmu_disable_virt(void) { } +#endif + +#define arch_perf_out_copy_user copy_from_user_nmi + +#endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000..94de1a05a --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h @@ -0,0 +1,877 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Netburst Performance Events (P4, old Xeon) + */ + +#ifndef PERF_EVENT_P4_H +#define PERF_EVENT_P4_H + +#include +#include + +/* + * NetBurst has performance MSRs shared between + * threads if HT is turned on, ie for both logical + * processors (mem: in turn in Atom with HT support + * perf-MSRs are not shared and every thread has its + * own perf-MSRs set) + */ +#define ARCH_P4_TOTAL_ESCR (46) +#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ +#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) +#define ARCH_P4_MAX_CCCR (18) + +#define ARCH_P4_CNTRVAL_BITS (40) +#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) +#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1)) + +#define P4_ESCR_EVENT_MASK 0x7e000000ULL +#define P4_ESCR_EVENT_SHIFT 25 +#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL +#define P4_ESCR_EVENTMASK_SHIFT 9 +#define P4_ESCR_TAG_MASK 0x000001e0ULL +#define P4_ESCR_TAG_SHIFT 5 +#define P4_ESCR_TAG_ENABLE 0x00000010ULL +#define P4_ESCR_T0_OS 0x00000008ULL +#define P4_ESCR_T0_USR 0x00000004ULL +#define P4_ESCR_T1_OS 0x00000002ULL +#define P4_ESCR_T1_USR 0x00000001ULL + +#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) +#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) +#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) + +#define P4_CCCR_OVF 0x80000000ULL +#define P4_CCCR_CASCADE 0x40000000ULL +#define P4_CCCR_OVF_PMI_T0 0x04000000ULL +#define P4_CCCR_OVF_PMI_T1 0x08000000ULL +#define P4_CCCR_FORCE_OVF 0x02000000ULL +#define P4_CCCR_EDGE 0x01000000ULL +#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL +#define P4_CCCR_THRESHOLD_SHIFT 20 +#define P4_CCCR_COMPLEMENT 0x00080000ULL +#define P4_CCCR_COMPARE 0x00040000ULL +#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL +#define P4_CCCR_ESCR_SELECT_SHIFT 13 +#define P4_CCCR_ENABLE 0x00001000ULL +#define P4_CCCR_THREAD_SINGLE 0x00010000ULL +#define P4_CCCR_THREAD_BOTH 0x00020000ULL +#define P4_CCCR_THREAD_ANY 0x00030000ULL +#define P4_CCCR_RESERVED 0x00000fffULL + +#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) +#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) + +#define P4_GEN_ESCR_EMASK(class, name, bit) \ + class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT) +#define P4_ESCR_EMASK_BIT(class, name) class##__##name + +/* + * config field is 64bit width and consists of + * HT << 63 | ESCR << 32 | CCCR + * where HT is HyperThreading bit (since ESCR + * has it reserved we may use it for own purpose) + * + * note that this is NOT the addresses of respective + * ESCR and CCCR but rather an only packed value should + * be unpacked and written to a proper addresses + * + * the base idea is to pack as much info as possible + */ +#define p4_config_pack_escr(v) (((u64)(v)) << 32) +#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) +#define p4_config_unpack_escr(v) (((u64)(v)) >> 32) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) + +#define p4_config_unpack_emask(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t = t & P4_ESCR_EVENTMASK_MASK; \ + t = t >> P4_ESCR_EVENTMASK_SHIFT; \ + t; \ + }) + +#define p4_config_unpack_event(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t = t & P4_ESCR_EVENT_MASK; \ + t = t >> P4_ESCR_EVENT_SHIFT; \ + t; \ + }) + +#define P4_CONFIG_HT_SHIFT 63 +#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) + +/* + * If an event has alias it should be marked + * with a special bit. (Don't forget to check + * P4_PEBS_CONFIG_MASK and related bits on + * modification.) + */ +#define P4_CONFIG_ALIASABLE (1ULL << 9) + +/* + * The bits we allow to pass for RAW events + */ +#define P4_CONFIG_MASK_ESCR \ + P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE + +#define P4_CONFIG_MASK_CCCR \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_RESERVED + +/* some dangerous bits are reserved for kernel internals */ +#define P4_CONFIG_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ + (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) + +/* + * In case of event aliasing we need to preserve some + * caller bits, otherwise the mapping won't be complete. + */ +#define P4_CONFIG_EVENT_ALIAS_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \ + p4_config_pack_cccr(P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE)) + +#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \ + ((P4_CONFIG_HT) | \ + p4_config_pack_escr(P4_ESCR_T0_OS | \ + P4_ESCR_T0_USR | \ + P4_ESCR_T1_OS | \ + P4_ESCR_T1_USR) | \ + p4_config_pack_cccr(P4_CCCR_OVF | \ + P4_CCCR_CASCADE | \ + P4_CCCR_FORCE_OVF | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_OVF_PMI_T0 | \ + P4_CCCR_OVF_PMI_T1 | \ + P4_CONFIG_ALIASABLE)) + +static inline bool p4_is_event_cascaded(u64 config) +{ + u32 cccr = p4_config_unpack_cccr(config); + return !!(cccr & P4_CCCR_CASCADE); +} + +static inline int p4_ht_config_thread(u64 config) +{ + return !!(config & P4_CONFIG_HT); +} + +static inline u64 p4_set_ht_bit(u64 config) +{ + return config | P4_CONFIG_HT; +} + +static inline u64 p4_clear_ht_bit(u64 config) +{ + return config & ~P4_CONFIG_HT; +} + +static inline int p4_ht_active(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings > 1; +#endif + return 0; +} + +static inline int p4_ht_thread(int cpu) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); +#endif + return 0; +} + +static inline int p4_should_swap_ts(u64 config, int cpu) +{ + return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); +} + +static inline u32 p4_default_cccr_conf(int cpu) +{ + /* + * Note that P4_CCCR_THREAD_ANY is "required" on + * non-HT machines (on HT machines we count TS events + * regardless the state of second logical processor + */ + u32 cccr = P4_CCCR_THREAD_ANY; + + if (!p4_ht_thread(cpu)) + cccr |= P4_CCCR_OVF_PMI_T0; + else + cccr |= P4_CCCR_OVF_PMI_T1; + + return cccr; +} + +static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) +{ + u32 escr = 0; + + if (!p4_ht_thread(cpu)) { + if (!exclude_os) + escr |= P4_ESCR_T0_OS; + if (!exclude_usr) + escr |= P4_ESCR_T0_USR; + } else { + if (!exclude_os) + escr |= P4_ESCR_T1_OS; + if (!exclude_usr) + escr |= P4_ESCR_T1_USR; + } + + return escr; +} + +/* + * This are the events which should be used in "Event Select" + * field of ESCR register, they are like unique keys which allow + * the kernel to determinate which CCCR and COUNTER should be + * used to track an event + */ +enum P4_EVENTS { + P4_EVENT_TC_DELIVER_MODE, + P4_EVENT_BPU_FETCH_REQUEST, + P4_EVENT_ITLB_REFERENCE, + P4_EVENT_MEMORY_CANCEL, + P4_EVENT_MEMORY_COMPLETE, + P4_EVENT_LOAD_PORT_REPLAY, + P4_EVENT_STORE_PORT_REPLAY, + P4_EVENT_MOB_LOAD_REPLAY, + P4_EVENT_PAGE_WALK_TYPE, + P4_EVENT_BSQ_CACHE_REFERENCE, + P4_EVENT_IOQ_ALLOCATION, + P4_EVENT_IOQ_ACTIVE_ENTRIES, + P4_EVENT_FSB_DATA_ACTIVITY, + P4_EVENT_BSQ_ALLOCATION, + P4_EVENT_BSQ_ACTIVE_ENTRIES, + P4_EVENT_SSE_INPUT_ASSIST, + P4_EVENT_PACKED_SP_UOP, + P4_EVENT_PACKED_DP_UOP, + P4_EVENT_SCALAR_SP_UOP, + P4_EVENT_SCALAR_DP_UOP, + P4_EVENT_64BIT_MMX_UOP, + P4_EVENT_128BIT_MMX_UOP, + P4_EVENT_X87_FP_UOP, + P4_EVENT_TC_MISC, + P4_EVENT_GLOBAL_POWER_EVENTS, + P4_EVENT_TC_MS_XFER, + P4_EVENT_UOP_QUEUE_WRITES, + P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, + P4_EVENT_RETIRED_BRANCH_TYPE, + P4_EVENT_RESOURCE_STALL, + P4_EVENT_WC_BUFFER, + P4_EVENT_B2B_CYCLES, + P4_EVENT_BNR, + P4_EVENT_SNOOP, + P4_EVENT_RESPONSE, + P4_EVENT_FRONT_END_EVENT, + P4_EVENT_EXECUTION_EVENT, + P4_EVENT_REPLAY_EVENT, + P4_EVENT_INSTR_RETIRED, + P4_EVENT_UOPS_RETIRED, + P4_EVENT_UOP_TYPE, + P4_EVENT_BRANCH_RETIRED, + P4_EVENT_MISPRED_BRANCH_RETIRED, + P4_EVENT_X87_ASSIST, + P4_EVENT_MACHINE_CLEAR, + P4_EVENT_INSTR_COMPLETED, +}; + +#define P4_OPCODE(event) event##_OPCODE +#define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) +#define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) +#define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) + +/* + * Comments below the event represent ESCR restriction + * for this event and counter index per ESCR + * + * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early + * processor builds (family 0FH, models 01H-02H). These MSRs + * are not available on later versions, so that we don't use + * them completely + * + * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly + * working so that we should not use this CCCR and respective + * counter as result + */ +enum P4_EVENT_OPCODES { + P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + + P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), + /* + * MSR_P4_BPU_ESCR0: 0, 1 + * MSR_P4_BPU_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), + /* + * MSR_P4_ITLB_ESCR0: 0, 1 + * MSR_P4_ITLB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), + /* + * MSR_P4_MOB_ESCR0: 0, 1 + * MSR_P4_MOB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), + /* + * MSR_P4_PMH_ESCR0: 0, 1 + * MSR_P4_PMH_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), + /* + * MSR_P4_BSU_ESCR0: 0, 1 + * MSR_P4_BSU_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), + /* + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), + /* + * MSR_P4_BSU_ESCR0: 0, 1 + */ + + P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), + /* + * NOTE: no ESCR name in docs, it's guessed + * MSR_P4_BSU_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + + P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + + P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + + P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR1: 6, 7 + */ + + P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR1: 6, 7 + */ + + P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), + /* + * MSR_P4_ALF_ESCR0: 12, 13, 16 + * MSR_P4_ALF_ESCR1: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + + P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + + P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), + /* + * MSR_P4_RAT_ESCR0: 12, 13, 16 + * MSR_P4_RAT_ESCR1: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + + P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ +}; + +/* + * a caller should use P4_ESCR_EMASK_NAME helper to + * pick the EventMask needed, for example + * + * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) + */ +enum P4_ESCR_EMASKS { + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), + + P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), + + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), + + P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), + + P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), +}; + +/* + * Note we have UOP and PEBS bits reserved for now + * just in case if we will need them once + */ +#define P4_PEBS_CONFIG_ENABLE (1ULL << 7) +#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8) +#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL +#define P4_PEBS_CONFIG_MASK 0xFFLL + +/* + * mem: Only counters MSR_IQ_COUNTER4 (16) and + * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling + */ +#define P4_PEBS_ENABLE 0x02000000ULL +#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL + +#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) +#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) + +#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask)) + +enum P4_PEBS_METRIC { + P4_PEBS_METRIC__none, + + P4_PEBS_METRIC__1stl_cache_load_miss_retired, + P4_PEBS_METRIC__2ndl_cache_load_miss_retired, + P4_PEBS_METRIC__dtlb_load_miss_retired, + P4_PEBS_METRIC__dtlb_store_miss_retired, + P4_PEBS_METRIC__dtlb_all_miss_retired, + P4_PEBS_METRIC__tagged_mispred_branch, + P4_PEBS_METRIC__mob_load_replay_retired, + P4_PEBS_METRIC__split_load_retired, + P4_PEBS_METRIC__split_store_retired, + + P4_PEBS_METRIC__max +}; + +/* + * Notes on internal configuration of ESCR+CCCR tuples + * + * Since P4 has quite the different architecture of + * performance registers in compare with "architectural" + * once and we have on 64 bits to keep configuration + * of performance event, the following trick is used. + * + * 1) Since both ESCR and CCCR registers have only low + * 32 bits valuable, we pack them into a single 64 bit + * configuration. Low 32 bits of such config correspond + * to low 32 bits of CCCR register and high 32 bits + * correspond to low 32 bits of ESCR register. + * + * 2) The meaning of every bit of such config field can + * be found in Intel SDM but it should be noted that + * we "borrow" some reserved bits for own usage and + * clean them or set to a proper value when we do + * a real write to hardware registers. + * + * 3) The format of bits of config is the following + * and should be either 0 or set to some predefined + * values: + * + * Low 32 bits + * ----------- + * 0-6: P4_PEBS_METRIC enum + * 7-11: reserved + * 12: reserved (Enable) + * 13-15: reserved (ESCR select) + * 16-17: Active Thread + * 18: Compare + * 19: Complement + * 20-23: Threshold + * 24: Edge + * 25: reserved (FORCE_OVF) + * 26: reserved (OVF_PMI_T0) + * 27: reserved (OVF_PMI_T1) + * 28-29: reserved + * 30: reserved (Cascade) + * 31: reserved (OVF) + * + * High 32 bits + * ------------ + * 0: reserved (T1_USR) + * 1: reserved (T1_OS) + * 2: reserved (T0_USR) + * 3: reserved (T0_OS) + * 4: Tag Enable + * 5-8: Tag Value + * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) + * 25-30: enum P4_EVENTS + * 31: reserved (HT thread) + */ + +#endif /* PERF_EVENT_P4_H */ + diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h new file mode 100644 index 000000000..c7ec5bb88 --- /dev/null +++ b/arch/x86/include/asm/pgalloc.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGALLOC_H +#define _ASM_X86_PGALLOC_H + +#include +#include /* for struct page */ +#include + +#define __HAVE_ARCH_PTE_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE +#include + +static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, + unsigned long start, unsigned long count) {} +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_release_pte(unsigned long pfn) {} +static inline void paravirt_release_pmd(unsigned long pfn) {} +static inline void paravirt_release_pud(unsigned long pfn) {} +static inline void paravirt_release_p4d(unsigned long pfn) {} +#endif + +/* + * Flags to use when allocating a user page table page. + */ +extern gfp_t __userpte_alloc_gfp; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + +/* + * Allocate and free page tables. + */ +extern pgd_t *pgd_alloc(struct mm_struct *); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); + +extern pgtable_t pte_alloc_one(struct mm_struct *); + +extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); + +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address) +{ + ___pte_free_tlb(tlb, pte); +} + +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} + +static inline void pmd_populate_kernel_safe(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd_safe(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + struct page *pte) +{ + unsigned long pfn = page_to_pfn(pte); + + paravirt_alloc_pte(mm, pfn); + set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); +} + +#if CONFIG_PGTABLE_LEVELS > 2 +extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); + +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long address) +{ + ___pmd_free_tlb(tlb, pmd); +} + +#ifdef CONFIG_X86_PAE +extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); +#else /* !CONFIG_X86_PAE */ +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); +} + +static inline void pud_populate_safe(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + set_pud_safe(pud, __pud(_PAGE_TABLE | __pa(pmd))); +} +#endif /* CONFIG_X86_PAE */ + +#if CONFIG_PGTABLE_LEVELS > 3 +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud))); +} + +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); +} + +extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); + +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, + unsigned long address) +{ + ___pud_free_tlb(tlb, pud); +} + +#if CONFIG_PGTABLE_LEVELS > 4 +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); +} + +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); + set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); +} + +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + gfp_t gfp = GFP_KERNEL_ACCOUNT; + + if (mm == &init_mm) + gfp &= ~__GFP_ACCOUNT; + return (p4d_t *)get_zeroed_page(gfp); +} + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + free_page((unsigned long)p4d); +} + +extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); + +static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, + unsigned long address) +{ + if (pgtable_l5_enabled()) + ___p4d_free_tlb(tlb, p4d); +} + +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ + +#endif /* _ASM_X86_PGALLOC_H */ diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h new file mode 100644 index 000000000..60d0f9015 --- /dev/null +++ b/arch/x86/include/asm/pgtable-2level.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_2LEVEL_H +#define _ASM_X86_PGTABLE_2LEVEL_H + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void native_set_pte(pte_t *ptep , pte_t pte) +{ + *ptep = pte; +} + +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ +} + +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} + +static inline void native_pmd_clear(pmd_t *pmdp) +{ + native_set_pmd(pmdp, __pmd(0)); +} + +static inline void native_pud_clear(pud_t *pudp) +{ +} + +static inline void native_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *xp) +{ + *xp = native_make_pte(0); +} + +#ifdef CONFIG_SMP +static inline pte_t native_ptep_get_and_clear(pte_t *xp) +{ + return __pte(xchg(&xp->pte_low, 0)); +} +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif + +#ifdef CONFIG_SMP +static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) +{ + return __pmd(xchg((pmdval_t *)xp, 0)); +} +#else +#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) +#endif + +#ifdef CONFIG_SMP +static inline pud_t native_pudp_get_and_clear(pud_t *xp) +{ + return __pud(xchg((pudval_t *)xp, 0)); +} +#else +#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) +#endif + +/* Bit manipulation helper on pte/pgoff entry */ +static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, + unsigned long mask, unsigned int leftshift) +{ + return ((value >> rightshift) & mask) << leftshift; +} + +/* Encode and de-code a swap entry */ +#define SWP_TYPE_BITS 5 +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) + +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ + & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << (_PAGE_BIT_PRESENT + 1)) \ + | ((offset) << SWP_OFFSET_SHIFT) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) +#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) + +/* No inverted PFNs on 2 level page tables */ + +static inline u64 protnone_mask(u64 val) +{ + return 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + return val; +} + +static inline bool __pte_needs_invert(u64 val) +{ + return false; +} + +#endif /* _ASM_X86_PGTABLE_2LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h new file mode 100644 index 000000000..7f6ccff0b --- /dev/null +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H +#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H + +#ifndef __ASSEMBLY__ +#include + +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long p4dval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef union { + pteval_t pte; + pteval_t pte_low; +} pte_t; +#endif /* !__ASSEMBLY__ */ + +#define SHARED_KERNEL_PMD 0 + +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED + +/* + * traditional i386 two-level paging structure: + */ + +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + + +/* + * the i386 is two-level, so we don't really have any + * PMD directory physically. + */ + +#define PTRS_PER_PTE 1024 + +/* This covers all VMSPLIT_* and VMSPLIT_*_OPT variants */ +#define PGD_KERNEL_START (CONFIG_PAGE_OFFSET >> PGDIR_SHIFT) + +#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h new file mode 100644 index 000000000..28421a887 --- /dev/null +++ b/arch/x86/include/asm/pgtable-3level.h @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_3LEVEL_H +#define _ASM_X86_PGTABLE_3LEVEL_H + +#include + +/* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. + * + * Copyright (C) 1999 Ingo Molnar + */ + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \ + __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %p(%016Lx)\n", \ + __FILE__, __LINE__, &(e), pmd_val(e)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ + __FILE__, __LINE__, &(e), pgd_val(e)) + +/* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +static inline void native_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +#define pmd_read_atomic pmd_read_atomic +/* + * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by GCC. Problem is, in certain places + * where pte_offset_map_lock() is called, concurrent page faults are + * allowed, if the mmap_lock is hold for reading. An example is mincore + * vs page faults vs MADV_DONTNEED. On the page fault side + * pmd_populate() rightfully does a set_64bit(), but if we're reading the + * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen + * because GCC will not read the 64-bit value of the pmd atomically. + * + * To fix this all places running pte_offset_map_lock() while holding the + * mmap_lock in read mode, shall read the pmdp pointer using this + * function to know if the pmd is null or not, and in turn to know if + * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd + * operations. + * + * Without THP if the mmap_lock is held for reading, the pmd can only + * transition from null to not null while pmd_read_atomic() runs. So + * we can always return atomic pmd values with this function. + * + * With THP if the mmap_lock is held for reading, the pmd can become + * trans_huge or none or point to a pte (and in turn become "stable") + * at any time under pmd_read_atomic(). We could read it truly + * atomically here with an atomic64_read() for the THP enabled case (and + * it would be a whole lot simpler), but to avoid using cmpxchg8b we + * only return an atomic pmdval if the low part of the pmdval is later + * found to be stable (i.e. pointing to a pte). We are also returning a + * 'none' (zero) pmdval if the low part of the pmd is zero. + * + * In some cases the high and low part of the pmdval returned may not be + * consistent if THP is enabled (the low part may point to previously + * mapped hugepage, while the high part may point to a more recently + * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only + * needs the low part of the pmd to be read atomically to decide if the + * pmd is unstable or not, with the only exception when the low part + * of the pmd is zero, in which case we return a 'none' pmd. + */ +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + pmdval_t ret; + u32 *tmp = (u32 *)pmdp; + + ret = (pmdval_t) (*tmp); + if (ret) { + /* + * If the low part is null, we must not read the high part + * or we can end up with a partial pmd. + */ + smp_rmb(); + ret |= ((pmdval_t)*(tmp + 1)) << 32; + } + + return (pmd_t) { ret }; +} + +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); +} + +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); +} + +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd); +#endif + set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); +} + +/* + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table + * entry, so clear the bottom half first and enforce ordering with a compiler + * barrier. + */ +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = 0; +} + +static inline void native_pmd_clear(pmd_t *pmd) +{ + u32 *tmp = (u32 *)pmd; + *tmp = 0; + smp_wmb(); + *(tmp + 1) = 0; +} + +static inline void native_pud_clear(pud_t *pudp) +{ +} + +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud(0)); + + /* + * According to Intel App note "TLBs, Paging-Structure Caches, + * and Their Invalidation", April 2007, document 317080-001, + * section 8.1: in PAE mode we explicitly have to flush the + * TLB via cr3 if the top-level pgd is changed... + * + * Currently all places where pud_clear() is called either have + * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or + * pud_clear_bad()), so we don't need TLB flush here. + */ +} + +#ifdef CONFIG_SMP +static inline pte_t native_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); + + return res; +} +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif + +union split_pmd { + struct { + u32 pmd_low; + u32 pmd_high; + }; + pmd_t pmd; +}; + +#ifdef CONFIG_SMP +static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) +{ + union split_pmd res, *orig = (union split_pmd *)pmdp; + + /* xchg acts as a barrier before setting of the high bits */ + res.pmd_low = xchg(&orig->pmd_low, 0); + res.pmd_high = orig->pmd_high; + orig->pmd_high = 0; + + return res.pmd; +} +#else +#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) +#endif + +#ifndef pmdp_establish +#define pmdp_establish pmdp_establish +static inline pmd_t pmdp_establish(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, pmd_t pmd) +{ + pmd_t old; + + /* + * If pmd has present bit cleared we can get away without expensive + * cmpxchg64: we can update pmdp half-by-half without racing with + * anybody. + */ + if (!(pmd_val(pmd) & _PAGE_PRESENT)) { + union split_pmd old, new, *ptr; + + ptr = (union split_pmd *)pmdp; + + new.pmd = pmd; + + /* xchg acts as a barrier before setting of the high bits */ + old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low); + old.pmd_high = ptr->pmd_high; + ptr->pmd_high = new.pmd_high; + return old.pmd; + } + + do { + old = *pmdp; + } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + + return old; +} +#endif + +#ifdef CONFIG_SMP +union split_pud { + struct { + u32 pud_low; + u32 pud_high; + }; + pud_t pud; +}; + +static inline pud_t native_pudp_get_and_clear(pud_t *pudp) +{ + union split_pud res, *orig = (union split_pud *)pudp; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0)); +#endif + + /* xchg acts as a barrier before setting of the high bits */ + res.pud_low = xchg(&orig->pud_low, 0); + res.pud_high = orig->pud_high; + orig->pud_high = 0; + + return res.pud; +} +#else +#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) +#endif + +/* Encode and de-code a swap entry */ +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) +#define __swp_type(x) (((x).val) & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS) +#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS}) + +/* + * Normally, __swp_entry() converts from arch-independent swp_entry_t to + * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result + * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the + * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to + * __swp_entry_to_pte() through the following helper macro based on 64bit + * __swp_entry(). + */ +#define __swp_pteval_entry(type, offset) ((pteval_t) { \ + (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) }) + +#define __swp_entry_to_pte(x) ((pte_t){ .pte = \ + __swp_pteval_entry(__swp_type(x), __swp_offset(x)) }) +/* + * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent + * swp_entry_t, but also has to convert it from 64bit to the 32bit + * intermediate representation, using the following macros based on 64bit + * __swp_type() and __swp_offset(). + */ +#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS))) +#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)) + +#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ + __pteval_swp_offset(pte))) + +#include + +#endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h new file mode 100644 index 000000000..56baf43be --- /dev/null +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H +#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H + +#ifndef __ASSEMBLY__ +#include + +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 p4dval_t; +typedef u64 pgdval_t; +typedef u64 pgprotval_t; + +typedef union { + struct { + unsigned long pte_low, pte_high; + }; + pteval_t pte; +} pte_t; +#endif /* !__ASSEMBLY__ */ + +#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) + +#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED) + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 30 +#define PTRS_PER_PGD 4 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#define MAX_POSSIBLE_PHYSMEM_BITS 36 +#define PGD_KERNEL_START (CONFIG_PAGE_OFFSET >> PGDIR_SHIFT) + +#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h new file mode 100644 index 000000000..a0c1525f1 --- /dev/null +++ b/arch/x86/include/asm/pgtable-invert.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PGTABLE_INVERT_H +#define _ASM_PGTABLE_INVERT_H 1 + +#ifndef __ASSEMBLY__ + +/* + * A clear pte value is special, and doesn't get inverted. + * + * Note that even users that only pass a pgprot_t (rather + * than a full pte) won't trigger the special zero case, + * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED + * set. So the all zero case really is limited to just the + * cleared page table entry case. + */ +static inline bool __pte_needs_invert(u64 val) +{ + return val && !(val & _PAGE_PRESENT); +} + +/* Get a mask to xor with the page table entry to get the correct pfn. */ +static inline u64 protnone_mask(u64 val) +{ + return __pte_needs_invert(val) ? ~0ull : 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + /* + * When a PTE transitions from NONE to !NONE or vice-versa + * invert the PFN part to stop speculation. + * pte_pfn undoes this when needed. + */ + if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) + val = (val & ~mask) | (~val & mask); + return val; +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h new file mode 100644 index 000000000..286a71810 --- /dev/null +++ b/arch/x86/include/asm/pgtable.h @@ -0,0 +1,1469 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_H +#define _ASM_X86_PGTABLE_H + +#include +#include +#include + +/* + * Macro to mark a page protection value as UC- + */ +#define pgprot_noncached(prot) \ + ((boot_cpu_data.x86 > 3) \ + ? (__pgprot(pgprot_val(prot) | \ + cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ + : (prot)) + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include +#include +#include + +extern pgd_t early_top_pgt[PTRS_PER_PGD]; +bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); + +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user); +void ptdump_walk_pgd_level_checkwx(void); +void ptdump_walk_user_pgd_level_checkwx(void); + +/* + * Macros to add or remove encryption attribute + */ +#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) +#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) + +#ifdef CONFIG_DEBUG_WX +#define debug_checkwx() ptdump_walk_pgd_level_checkwx() +#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() +#else +#define debug_checkwx() do { } while (0) +#define debug_checkwx_user() do { } while (0) +#endif + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] + __visible; +#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) + +extern spinlock_t pgd_lock; +extern struct list_head pgd_list; + +extern struct mm_struct *pgd_page_get_mm(struct page *page); + +extern pmdval_t early_pmd_flags; + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else /* !CONFIG_PARAVIRT_XXL */ +#define set_pte(ptep, pte) native_set_pte(ptep, pte) + +#define set_pte_atomic(ptep, pte) \ + native_set_pte_atomic(ptep, pte) + +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) + +#ifndef __PAGETABLE_P4D_FOLDED +#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0) +#endif + +#ifndef set_p4d +# define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d) +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +#define p4d_clear(p4d) native_p4d_clear(p4d) +#endif + +#ifndef set_pud +# define set_pud(pudp, pud) native_set_pud(pudp, pud) +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_clear(pud) native_pud_clear(pud) +#endif + +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) + +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) + +#ifndef __PAGETABLE_P4D_FOLDED +#define p4d_val(x) native_p4d_val(x) +#define __p4d(x) native_make_p4d(x) +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_val(x) native_pud_val(x) +#define __pud(x) native_make_pud(x) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) + +#define arch_end_context_switch(prev) do {} while(0) +#endif /* CONFIG_PARAVIRT_XXL */ + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_DIRTY; +} + +static inline int pte_young(pte_t pte) +{ + return pte_flags(pte) & _PAGE_ACCESSED; +} + +static inline int pmd_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_DIRTY; +} + +#define pmd_young pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_ACCESSED; +} + +static inline int pud_dirty(pud_t pud) +{ + return pud_flags(pud) & _PAGE_DIRTY; +} + +static inline int pud_young(pud_t pud) +{ + return pud_flags(pud) & _PAGE_ACCESSED; +} + +static inline int pte_write(pte_t pte) +{ + return pte_flags(pte) & _PAGE_RW; +} + +static inline int pte_huge(pte_t pte) +{ + return pte_flags(pte) & _PAGE_PSE; +} + +static inline int pte_global(pte_t pte) +{ + return pte_flags(pte) & _PAGE_GLOBAL; +} + +static inline int pte_exec(pte_t pte) +{ + return !(pte_flags(pte) & _PAGE_NX); +} + +static inline int pte_special(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SPECIAL; +} + +/* Entries that were set to PROT_NONE are inverted */ + +static inline u64 protnone_mask(u64 val); + +static inline unsigned long pte_pfn(pte_t pte) +{ + phys_addr_t pfn = pte_val(pte); + pfn ^= protnone_mask(pfn); + return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + phys_addr_t pfn = pmd_val(pmd); + pfn ^= protnone_mask(pfn); + return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + phys_addr_t pfn = pud_val(pud); + pfn ^= protnone_mask(pfn); + return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; +} + +static inline unsigned long p4d_pfn(p4d_t p4d) +{ + return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT; +} + +static inline unsigned long pgd_pfn(pgd_t pgd) +{ + return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +#define p4d_leaf p4d_large +static inline int p4d_large(p4d_t p4d) +{ + /* No 512 GiB pages yet */ + return 0; +} + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) + +#define pmd_leaf pmd_large +static inline int pmd_large(pmd_t pte) +{ + return pmd_flags(pte) & _PAGE_PSE; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ +static inline int pmd_trans_huge(pmd_t pmd) +{ + return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; +} + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline int pud_trans_huge(pud_t pud) +{ + return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; +} +#endif + +#define has_transparent_hugepage has_transparent_hugepage +static inline int has_transparent_hugepage(void) +{ + return boot_cpu_has(X86_FEATURE_PSE); +} + +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP +static inline int pmd_devmap(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_DEVMAP); +} + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline int pud_devmap(pud_t pud) +{ + return !!(pud_val(pud) & _PAGE_DEVMAP); +} +#else +static inline int pud_devmap(pud_t pud) +{ + return 0; +} +#endif + +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} +#endif +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pte_t pte_set_flags(pte_t pte, pteval_t set) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v | set); +} + +static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v & ~clear); +} + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_UFFD_WP); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline pte_t pte_mkclean(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_DIRTY); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_ACCESSED); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_RW); +} + +static inline pte_t pte_mkexec(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_NX); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_RW); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_PSE); +} + +static inline pte_t pte_clrhuge(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_PSE); +} + +static inline pte_t pte_mkglobal(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_GLOBAL); +} + +static inline pte_t pte_clrglobal(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_GLOBAL); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SPECIAL); +} + +static inline pte_t pte_mkdevmap(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); +} + +static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) +{ + pmdval_t v = native_pmd_val(pmd); + + return native_make_pmd(v | set); +} + +static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) +{ + pmdval_t v = native_pmd_val(pmd); + + return native_make_pmd(v & ~clear); +} + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_UFFD_WP; +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_UFFD_WP); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_DIRTY); +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_RW); +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mkdevmap(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_DEVMAP); +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_PSE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_RW); +} + +static inline pud_t pud_set_flags(pud_t pud, pudval_t set) +{ + pudval_t v = native_pud_val(pud); + + return native_make_pud(v | set); +} + +static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) +{ + pudval_t v = native_pud_val(pud); + + return native_make_pud(v & ~clear); +} + +static inline pud_t pud_mkold(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_ACCESSED); +} + +static inline pud_t pud_mkclean(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_DIRTY); +} + +static inline pud_t pud_wrprotect(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_RW); +} + +static inline pud_t pud_mkdirty(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); +} + +static inline pud_t pud_mkdevmap(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_DEVMAP); +} + +static inline pud_t pud_mkhuge(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_PSE); +} + +static inline pud_t pud_mkyoung(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_ACCESSED); +} + +static inline pud_t pud_mkwrite(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_RW); +} + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline int pte_soft_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SOFT_DIRTY; +} + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; +} + +static inline int pud_soft_dirty(pud_t pud) +{ + return pud_flags(pud) & _PAGE_SOFT_DIRTY; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); +} + +static inline pud_t pud_mksoft_dirty(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); +} + +static inline pud_t pud_clear_soft_dirty(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_SOFT_DIRTY); +} + +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + +/* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. + */ +static inline pgprotval_t massage_pgprot(pgprot_t pgprot) +{ + pgprotval_t protval = pgprot_val(pgprot); + + if (protval & _PAGE_PRESENT) + protval &= __supported_pte_mask; + + return protval; +} + +static inline pgprotval_t check_pgprot(pgprot_t pgprot) +{ + pgprotval_t massaged_val = massage_pgprot(pgprot); + + /* mmdebug.h can not be included here because of dependencies */ +#ifdef CONFIG_DEBUG_VM + WARN_ONCE(pgprot_val(pgprot) != massaged_val, + "attempted to set unsupported pgprot: %016llx " + "bits: %016llx supported: %016llx\n", + (u64)pgprot_val(pgprot), + (u64)pgprot_val(pgprot) ^ massaged_val, + (u64)__supported_pte_mask); +#endif + + return massaged_val; +} + +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PTE_PFN_MASK; + return __pte(pfn | check_pgprot(pgprot)); +} + +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PMD_PAGE_MASK; + return __pmd(pfn | check_pgprot(pgprot)); +} + +static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) +{ + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PUD_PAGE_MASK; + return __pud(pfn | check_pgprot(pgprot)); +} + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + return pfn_pmd(pmd_pfn(pmd), + __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pteval_t val = pte_val(pte), oldval = val; + + /* + * Chop off the NX bit (if present), and add the NX portion of + * the newprot (if present): + */ + val &= _PAGE_CHG_MASK; + val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; + val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); + return __pte(val); +} + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pmdval_t val = pmd_val(pmd), oldval = val; + + val &= _HPAGE_CHG_MASK; + val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; + val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); + return __pmd(val); +} + +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ +#define pgprot_modify pgprot_modify +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +{ + pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; + return __pgprot(preservebits | addbits); +} + +#define pte_pgprot(x) __pgprot(pte_flags(x)) +#define pmd_pgprot(x) __pgprot(pmd_flags(x)) +#define pud_pgprot(x) __pgprot(pud_flags(x)) +#define p4d_pgprot(x) __pgprot(p4d_flags(x)) + +#define canon_pgprot(p) __pgprot(massage_pgprot(p)) + +static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, + enum page_cache_mode pcm, + enum page_cache_mode new_pcm) +{ + /* + * PAT type is always WB for untracked ranges, so no need to check. + */ + if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) + return 1; + + /* + * Certain new memtypes are not allowed with certain + * requested memtype: + * - request is uncached, return cannot be write-back + * - request is write-combine, return cannot be write-back + * - request is write-through, return cannot be write-back + * - request is write-through, return cannot be write-combine + */ + if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WC && + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WT && + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WT && + new_pcm == _PAGE_CACHE_MODE_WC)) { + return 0; + } + + return 1; +} + +pmd_t *populate_extra_pmd(unsigned long vaddr); +pte_t *populate_extra_pte(unsigned long vaddr); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgtbl(pgdp, pgd); +} +#else /* CONFIG_PAGE_TABLE_ISOLATION */ +static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +#endif /* __ASSEMBLY__ */ + + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include + +static inline int pte_none(pte_t pte) +{ + return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK)); +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t a, pte_t b) +{ + return a.pte == b.pte; +} + +static inline int pte_present(pte_t a) +{ + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); +} + +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP +static inline int pte_devmap(pte_t a) +{ + return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; +} +#endif + +#define pte_accessible pte_accessible +static inline bool pte_accessible(struct mm_struct *mm, pte_t a) +{ + if (pte_flags(a) & _PAGE_PRESENT) + return true; + + if ((pte_flags(a) & _PAGE_PROTNONE) && + atomic_read(&mm->tlb_flush_pending)) + return true; + + return false; +} + +static inline int pmd_present(pmd_t pmd) +{ + /* + * Checking for _PAGE_PSE is needed too because + * split_huge_page will temporarily clear the present bit (but + * the _PAGE_PSE flag will remain set at all times while the + * _PAGE_PRESENT bit is clear). + */ + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); +} + +#ifdef CONFIG_NUMA_BALANCING +/* + * These work without NUMA balancing but the kernel does not care. See the + * comment in include/linux/pgtable.h + */ +static inline int pte_protnone(pte_t pte) +{ + return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT)) + == _PAGE_PROTNONE; +} + +static inline int pmd_protnone(pmd_t pmd) +{ + return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT)) + == _PAGE_PROTNONE; +} +#endif /* CONFIG_NUMA_BALANCING */ + +static inline int pmd_none(pmd_t pmd) +{ + /* Only check low word on 32-bit platforms, since it might be + out of sync with upper half. */ + unsigned long val = native_pmd_val(pmd); + return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0; +} + +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd)); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * (Currently stuck as a macro because of indirect forward reference + * to linux/mm.h:page_to_nid()) + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline int pmd_bad(pmd_t pmd) +{ + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); +} + +static inline unsigned long pages_to_mb(unsigned long npg) +{ + return npg >> (20 - PAGE_SHIFT); +} + +#if CONFIG_PGTABLE_LEVELS > 2 +static inline int pud_none(pud_t pud) +{ + return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; +} + +static inline int pud_present(pud_t pud) +{ + return pud_flags(pud) & _PAGE_PRESENT; +} + +static inline pmd_t *pud_pgtable(pud_t pud) +{ + return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pud_page(pud) pfn_to_page(pud_pfn(pud)) + +#define pud_leaf pud_large +static inline int pud_large(pud_t pud) +{ + return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); +} + +static inline int pud_bad(pud_t pud) +{ + return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; +} +#else +#define pud_leaf pud_large +static inline int pud_large(pud_t pud) +{ + return 0; +} +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ + +#if CONFIG_PGTABLE_LEVELS > 3 +static inline int p4d_none(p4d_t p4d) +{ + return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; +} + +static inline int p4d_present(p4d_t p4d) +{ + return p4d_flags(p4d) & _PAGE_PRESENT; +} + +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) + +static inline int p4d_bad(p4d_t p4d) +{ + unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (p4d_flags(p4d) & ~ignore_flags) != 0; +} +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ + +static inline unsigned long p4d_index(unsigned long address) +{ + return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1); +} + +#if CONFIG_PGTABLE_LEVELS > 4 +static inline int pgd_present(pgd_t pgd) +{ + if (!pgtable_l5_enabled()) + return 1; + return pgd_flags(pgd) & _PAGE_PRESENT; +} + +static inline unsigned long pgd_page_vaddr(pgd_t pgd) +{ + return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) + +/* to find an entry in a page-table-directory. */ +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + if (!pgtable_l5_enabled()) + return (p4d_t *)pgd; + return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); +} + +static inline int pgd_bad(pgd_t pgd) +{ + unsigned long ignore_flags = _PAGE_USER; + + if (!pgtable_l5_enabled()) + return 0; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; +} + +static inline int pgd_none(pgd_t pgd) +{ + if (!pgtable_l5_enabled()) + return 0; + /* + * There is no need to do a workaround for the KNL stray + * A/D bit erratum here. PGDs only point to page tables + * except on 32-bit non-PAE which is not supported on + * KNL. + */ + return !native_pgd_val(pgd); +} +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + +#endif /* __ASSEMBLY__ */ + +#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) + +#ifndef __ASSEMBLY__ + +extern int direct_gbpages; +void init_mem_mapping(void); +void early_alloc_pgt_buf(void); +extern void memblock_find_dma_reserve(void); +void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + +#ifdef CONFIG_X86_64 +extern pgd_t trampoline_pgd_entry; +#endif + +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; +} + +static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) +{ + pmd_t res = *pmdp; + + native_pmd_clear(pmdp); + return res; +} + +static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) +{ + pud_t res = *pudp; + + native_pud_clear(pudp); + return res; +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + page_table_check_pte_set(mm, addr, ptep, pte); + set_pte(ptep, pte); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + page_table_check_pmd_set(mm, addr, pmdp, pmd); + set_pmd(pmdp, pmd); +} + +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(mm, addr, pudp, pud); + native_set_pud(pudp, pud); +} + +/* + * We only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware + * will do the accessed bit for us, and we don't want to + * race with other CPU's that might be updating the dirty + * bit at the same time. + */ +struct vm_area_struct; + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +extern int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty); + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +extern int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +extern int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_t pte = native_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, addr, pte); + return pte; +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + int full) +{ + pte_t pte; + if (full) { + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, addr, pte); + } else { + pte = ptep_get_and_clear(mm, addr, ptep); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); +} + +#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) + +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); +extern int pudp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, + pud_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); +extern int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pud_t *pudp); + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + + +#define pmd_write pmd_write +static inline int pmd_write(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_RW; +} + +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + pmd_t pmd = native_pmdp_get_and_clear(pmdp); + + page_table_check_pmd_clear(mm, addr, pmd); + + return pmd; +} + +#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR +static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + pud_t pud = native_pudp_get_and_clear(pudp); + + page_table_check_pud_clear(mm, addr, pud); + + return pud; +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); +} + +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; +} + +#ifndef pmdp_establish +#define pmdp_establish pmdp_establish +static inline pmd_t pmdp_establish(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, pmd_t pmd) +{ + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); + if (IS_ENABLED(CONFIG_SMP)) { + return xchg(pmdp, pmd); + } else { + pmd_t old = *pmdp; + WRITE_ONCE(*pmdp, pmd); + return old; + } +} +#endif + +#define __HAVE_ARCH_PMDP_INVALIDATE_AD +extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); +} + +#define pgd_leaf pgd_large +static inline int pgd_large(pgd_t pgd) { return 0; } + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); + * + * dst - pointer to pgd range anywhere on a pgd page + * src - "" + * count - the number of pgds to copy. + * + * dst and src can be on the same page, but the range must not overlap, + * and must not cross a page boundary. + */ +static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +{ + memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + /* Clone the user space pgd as well */ + memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); +#endif +} + +#define PTE_SHIFT ilog2(PTRS_PER_PTE) +static inline int page_level_shift(enum pg_level level) +{ + return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; +} +static inline unsigned long page_level_size(enum pg_level level) +{ + return 1UL << page_level_shift(level); +} +static inline unsigned long page_level_mask(enum pg_level level) +{ + return ~(page_level_size(level) - 1); +} + +/* + * The x86 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd) +{ +} +static inline void update_mmu_cache_pud(struct vm_area_struct *vma, + unsigned long addr, pud_t *pud) +{ +} +#ifdef _PAGE_SWP_EXCLUSIVE +#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); +} + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); +} +#endif /* _PAGE_SWP_EXCLUSIVE */ + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); +} + +static inline int pte_swp_soft_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; +} + +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); +} + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY); +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY); +} +#endif +#endif + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); +} + +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline u16 pte_flags_pkey(unsigned long pte_flags) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + /* ifdef to avoid doing 59-bit shift on 32-bit values */ + return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0; +#else + return 0; +#endif +} + +static inline bool __pkru_allows_pkey(u16 pkey, bool write) +{ + u32 pkru = read_pkru(); + + if (!__pkru_allows_read(pkru, pkey)) + return false; + if (write && !__pkru_allows_write(pkru, pkey)) + return false; + + return true; +} + +/* + * 'pteval' can come from a PTE, PMD or PUD. We only check + * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the + * same value on all 3 types. + */ +static inline bool __pte_access_permitted(unsigned long pteval, bool write) +{ + unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_RW; + + if ((pteval & need_pte_bits) != need_pte_bits) + return 0; + + return __pkru_allows_pkey(pte_flags_pkey(pteval), write); +} + +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + return __pte_access_permitted(pte_val(pte), write); +} + +#define pmd_access_permitted pmd_access_permitted +static inline bool pmd_access_permitted(pmd_t pmd, bool write) +{ + return __pte_access_permitted(pmd_val(pmd), write); +} + +#define pud_access_permitted pud_access_permitted +static inline bool pud_access_permitted(pud_t pud, bool write) +{ + return __pte_access_permitted(pud_val(pud), write); +} + +#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 +extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); + +static inline bool arch_has_pfn_modify_check(void) +{ + return boot_cpu_has_bug(X86_BUG_L1TF); +} + +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) +{ + return true; +} + +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); +} +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h new file mode 100644 index 000000000..7c9c968a4 --- /dev/null +++ b/arch/x86/include/asm/pgtable_32.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_32_H +#define _ASM_X86_PGTABLE_32_H + +#include + +/* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page + * table, so that we physically have the same two-level page table as the + * i386 mmu expects. + * + * This file contains the functions and defines necessary to modify and use + * the i386 page table tree. + */ +#ifndef __ASSEMBLY__ +#include +#include +#include + +#include +#include +#include + +struct mm_struct; +struct vm_area_struct; + +extern pgd_t swapper_pg_dir[1024]; +extern pgd_t initial_page_table[1024]; +extern pmd_t initial_pg_pmd[]; + +void paging_init(void); +void sync_initial_page_table(void); + +#ifdef CONFIG_X86_PAE +# include +#else +# include +#endif + +/* Clear a kernel PTE and flush it from the TLB */ +#define kpte_clear_flush(ptep, vaddr) \ +do { \ + pte_clear(&init_mm, (vaddr), (ptep)); \ + flush_tlb_one_kernel((vaddr)); \ +} while (0) + +#endif /* !__ASSEMBLY__ */ + +/* + * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM + */ +#ifdef CONFIG_FLATMEM +#define kern_addr_valid(addr) (1) +#else +#define kern_addr_valid(kaddr) (0) +#endif + +/* + * This is used to calculate the .brk reservation for initial pagetables. + * Enough space is reserved to allocate pagetables sufficient to cover all + * of LOWMEM_PAGES, which is an upper bound on the size of the direct map of + * lowmem. + * + * With PAE paging (PTRS_PER_PMD > 1), we allocate PTRS_PER_PGD == 4 pages for + * the PMD's in addition to the pages required for the last level pagetables. + */ +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) + +#endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000..b6355416a --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLY__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h new file mode 100644 index 000000000..5356a46b0 --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifdef CONFIG_X86_PAE +# include +# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_MASK (~(PMD_SIZE - 1)) +#else +# include +#endif + +#define pgtable_l5_enabled() 0 + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h new file mode 100644 index 000000000..07cd53eee --- /dev/null +++ b/arch/x86/include/asm/pgtable_64.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_64_H +#define _ASM_X86_PGTABLE_64_H + +#include +#include + +#ifndef __ASSEMBLY__ + +/* + * This file contains the functions and defines necessary to modify and use + * the x86-64 page table tree. + */ +#include +#include +#include +#include + +extern p4d_t level4_kernel_pgt[512]; +extern p4d_t level4_ident_pgt[512]; +extern pud_t level3_kernel_pgt[512]; +extern pud_t level3_ident_pgt[512]; +extern pmd_t level2_kernel_pgt[512]; +extern pmd_t level2_fixmap_pgt[512]; +extern pmd_t level2_ident_pgt[512]; +extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; +extern pgd_t init_top_pgt[]; + +#define swapper_pg_dir init_top_pgt + +extern void paging_init(void); +static inline void sync_initial_page_table(void) { } + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %p(%016lx)\n", \ + __FILE__, __LINE__, &(e), pte_val(e)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %p(%016lx)\n", \ + __FILE__, __LINE__, &(e), pmd_val(e)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %p(%016lx)\n", \ + __FILE__, __LINE__, &(e), pud_val(e)) + +#if CONFIG_PGTABLE_LEVELS >= 5 +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %p(%016lx)\n", \ + __FILE__, __LINE__, &(e), p4d_val(e)) +#endif + +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %p(%016lx)\n", \ + __FILE__, __LINE__, &(e), pgd_val(e)) + +struct mm_struct; + +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} + +void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); +void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); + +static inline void native_set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + native_set_pte(ptep, native_make_pte(0)); +} + +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} + +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*pmdp, pmd); +} + +static inline void native_pmd_clear(pmd_t *pmd) +{ + native_set_pmd(pmd, native_make_pmd(0)); +} + +static inline pte_t native_ptep_get_and_clear(pte_t *xp) +{ +#ifdef CONFIG_SMP + return native_make_pte(xchg(&xp->pte, 0)); +#else + /* native_local_ptep_get_and_clear, + but duplicated because of cyclic dependency */ + pte_t ret = *xp; + native_pte_clear(NULL, 0, xp); + return ret; +#endif +} + +static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) +{ +#ifdef CONFIG_SMP + return native_make_pmd(xchg(&xp->pmd, 0)); +#else + /* native_local_pmdp_get_and_clear, + but duplicated because of cyclic dependency */ + pmd_t ret = *xp; + native_pmd_clear(xp); + return ret; +#endif +} + +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*pudp, pud); +} + +static inline void native_pud_clear(pud_t *pud) +{ + native_set_pud(pud, native_make_pud(0)); +} + +static inline pud_t native_pudp_get_and_clear(pud_t *xp) +{ +#ifdef CONFIG_SMP + return native_make_pud(xchg(&xp->pud, 0)); +#else + /* native_local_pudp_get_and_clear, + * but duplicated because of cyclic dependency + */ + pud_t ret = *xp; + + native_pud_clear(xp); + return ret; +#endif +} + +static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + pgd_t pgd; + + if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + WRITE_ONCE(*p4dp, p4d); + return; + } + + pgd = native_make_pgd(native_p4d_val(p4d)); + pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); +} + +static inline void native_p4d_clear(p4d_t *p4d) +{ + native_set_p4d(p4d, native_make_p4d(0)); +} + +static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); +} + +static inline void native_pgd_clear(pgd_t *pgd) +{ + native_set_pgd(pgd, native_make_pgd(0)); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +/* PGD - Level 4 access */ + +/* PUD - Level 3 access */ + +/* PMD - Level 2 access */ + +/* PTE - Level 1 access */ + +/* + * Encode and de-code a swap entry + * + * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number + * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry + * + * G (8) is aliased and used as a PROT_NONE indicator for + * !present ptes. We need to start storing swap entries above + * there. We also need to avoid using A and D because of an + * erratum where they can be incorrectly set by hardware on + * non-present PTEs. + * + * SD Bits 1-4 are not used in non-present format and available for + * special use described below: + * + * SD (1) in swp entry is used to store soft dirty bit, which helps us + * remember soft dirty over page migration + * + * F (2) in swp entry is used to record when a pagetable is + * writeprotected by userfaultfd WP support. + * + * E (3) in swp entry is used to rememeber PG_anon_exclusive. + * + * Bit 7 in swp entry should be 0 because pmd_present checks not only P, + * but also L and G. + * + * The offset is inverted by a binary not operation to make the high + * physical bits set. + */ +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) + +/* Extract the high bits for type */ +#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) + +/* Shift up (to get rid of type), then down to get value */ +#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) + +/* + * Shift the offset up "too far" by TYPE bits, then down again + * The offset is inverted by a binary not operation to make the high + * physical bits set. + */ +#define __swp_entry(type, offset) ((swp_entry_t) { \ + (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) +#define __swp_entry_to_pte(x) (__pte((x).val)) +#define __swp_entry_to_pmd(x) (__pmd((x).val)) + +extern int kern_addr_valid(unsigned long addr); +extern void cleanup_highmap(void); + +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#define PAGE_AGP PAGE_KERNEL_NOCACHE +#define HAVE_PAGE_AGP 1 + +/* fs/proc/kcore.c */ +#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) +#define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) + +#define __HAVE_ARCH_PTE_SAME + +#define vmemmap ((struct page *)VMEMMAP_START) + +extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); +extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); + +#define gup_fast_permitted gup_fast_permitted +static inline bool gup_fast_permitted(unsigned long start, unsigned long end) +{ + if (end >> __VIRTUAL_MASK_SHIFT) + return false; + return true; +} + +#include + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h new file mode 100644 index 000000000..04f36063a --- /dev/null +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_64_DEFS_H +#define _ASM_X86_PGTABLE_64_DEFS_H + +#include + +#ifndef __ASSEMBLY__ +#include +#include + +/* + * These are used to make use of C type-checking.. + */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long p4dval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef struct { pteval_t pte; } pte_t; + +#ifdef CONFIG_X86_5LEVEL +extern unsigned int __pgtable_l5_enabled; + +#ifdef USE_EARLY_PGTABLE_L5 +/* + * cpu_feature_enabled() is not available in early boot code. + * Use variable instead. + */ +static inline bool pgtable_l5_enabled(void) +{ + return __pgtable_l5_enabled; +} +#else +#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) +#endif /* USE_EARLY_PGTABLE_L5 */ + +#else +#define pgtable_l5_enabled() 0 +#endif /* CONFIG_X86_5LEVEL */ + +extern unsigned int pgdir_shift; +extern unsigned int ptrs_per_p4d; + +#endif /* !__ASSEMBLY__ */ + +#define SHARED_KERNEL_PMD 0 + +#ifdef CONFIG_X86_5LEVEL + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT pgdir_shift +#define PTRS_PER_PGD 512 + +/* + * 4th level page in 5-level paging case + */ +#define P4D_SHIFT 39 +#define MAX_PTRS_PER_P4D 512 +#define PTRS_PER_P4D ptrs_per_p4d +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) + +#define MAX_POSSIBLE_PHYSMEM_BITS 52 + +#else /* CONFIG_X86_5LEVEL */ + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 +#define MAX_PTRS_PER_P4D 1 + +#endif /* CONFIG_X86_5LEVEL */ + +/* + * 3rd level page + */ +#define PUD_SHIFT 30 +#define PTRS_PER_PUD 512 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +/* + * See Documentation/x86/x86_64/mm.rst for a description of the memory map. + * + * Be very careful vs. KASLR when changing anything here. The KASLR address + * range must not overlap with anything except the KASAN shadow area, which + * is correct as KASAN disables KASLR. + */ +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) + +#define GUARD_HOLE_PGD_ENTRY -256UL +#define GUARD_HOLE_SIZE (16UL << PGDIR_SHIFT) +#define GUARD_HOLE_BASE_ADDR (GUARD_HOLE_PGD_ENTRY << PGDIR_SHIFT) +#define GUARD_HOLE_END_ADDR (GUARD_HOLE_BASE_ADDR + GUARD_HOLE_SIZE) + +#define LDT_PGD_ENTRY -240UL +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) +#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE) + +#define __VMALLOC_BASE_L4 0xffffc90000000000UL +#define __VMALLOC_BASE_L5 0xffa0000000000000UL + +#define VMALLOC_SIZE_TB_L4 32UL +#define VMALLOC_SIZE_TB_L5 12800UL + +#define __VMEMMAP_BASE_L4 0xffffea0000000000UL +#define __VMEMMAP_BASE_L5 0xffd4000000000000UL + +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT +# define VMALLOC_START vmalloc_base +# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) +# define VMEMMAP_START vmemmap_base +#else +# define VMALLOC_START __VMALLOC_BASE_L4 +# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 +# define VMEMMAP_START __VMEMMAP_BASE_L4 +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ + +/* + * End of the region for which vmalloc page tables are pre-allocated. + * For non-KMSAN builds, this is the same as VMALLOC_END. + * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than + * VMALLOC_START..VMALLOC_END (see below). + */ +#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) + +#ifndef CONFIG_KMSAN +#define VMALLOC_END VMEMORY_END +#else +/* + * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4 + * are used to keep the metadata for virtual pages. The memory formerly + * belonging to vmalloc area is now laid out as follows: + * + * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area + * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to + * VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow + * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to + * VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins + * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START + * - shadow for modules, + * KMSAN_MODULES_ORIGIN_START to + * KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules. + */ +#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2) +#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1) + +/* + * vmalloc metadata addresses are calculated by adding shadow/origin offsets + * to vmalloc address. + */ +#define KMSAN_VMALLOC_SHADOW_OFFSET VMALLOC_QUARTER_SIZE +#define KMSAN_VMALLOC_ORIGIN_OFFSET (VMALLOC_QUARTER_SIZE << 1) + +#define KMSAN_VMALLOC_SHADOW_START (VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET) +#define KMSAN_VMALLOC_ORIGIN_START (VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET) + +/* + * The shadow/origin for modules are placed one by one in the last 1/4 of + * vmalloc space. + */ +#define KMSAN_MODULES_SHADOW_START (VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1) +#define KMSAN_MODULES_ORIGIN_START (KMSAN_MODULES_SHADOW_START + MODULES_LEN) +#endif /* CONFIG_KMSAN */ + +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) +/* The module sections ends with the start of the fixmap */ +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define MODULES_END _AC(0xffffffffff000000, UL) +#else +# define MODULES_END _AC(0xfffffffffe000000, UL) +#endif +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) + +#define CPU_ENTRY_AREA_PGD _AC(-4, UL) +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) + +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) + +#define EARLY_DYNAMIC_PAGE_TABLES 64 + +#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t)) + +/* + * We borrow bit 3 to remember PG_anon_exclusive. + */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PWT + +#endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000..d34cce1b9 --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h new file mode 100644 index 000000000..f6116b66f --- /dev/null +++ b/arch/x86/include/asm/pgtable_types.h @@ -0,0 +1,556 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PGTABLE_DEFS_H +#define _ASM_X86_PGTABLE_DEFS_H + +#include +#include + +#include + +#define _PAGE_BIT_PRESENT 0 /* is present */ +#define _PAGE_BIT_RW 1 /* writeable */ +#define _PAGE_BIT_USER 2 /* userspace addressable */ +#define _PAGE_BIT_PWT 3 /* page write through */ +#define _PAGE_BIT_PCD 4 /* page cache disabled */ +#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ +#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ +#define _PAGE_BIT_PAT 7 /* on 4KB pages */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +#define _PAGE_BIT_SOFTW1 9 /* available for programmer */ +#define _PAGE_BIT_SOFTW2 10 /* " */ +#define _PAGE_BIT_SOFTW3 11 /* " */ +#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ +#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */ +#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */ +#define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */ +#define _PAGE_BIT_PKEY_BIT3 62 /* Protection Keys, bit 4/4 */ +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + +#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ +#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ +#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 + +/* If _PAGE_BIT_PRESENT is clear, we use these: */ +/* - if the user mapped it with PROT_NONE; pte_present gives true */ +#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL + +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) +#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) +#define _PAGE_SOFTW3 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW3) +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) +#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) +#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) +#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) +#define _PAGE_PKEY_BIT2 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT2) +#define _PAGE_PKEY_BIT3 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT3) +#else +#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 0)) +#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 0)) +#define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) +#define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) +#endif + +#define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ + _PAGE_PKEY_BIT1 | \ + _PAGE_PKEY_BIT2 | \ + _PAGE_PKEY_BIT3) + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +#define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED) +#else +#define _PAGE_KNL_ERRATUM_MASK 0 +#endif + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) +#else +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) +#endif + +/* + * Tracking soft dirty bit when a page goes to a swap is tricky. + * We need a bit which can be stored in pte _and_ not conflict + * with swap entry format. On x86 bits 1-4 are *not* involved + * into swap entry computation, but bit 7 is used for thp migration, + * so we borrow bit 1 for soft dirty tracking. + * + * Please note that this bit must be treated as swap dirty page + * mark if and only if the PTE/PMD has present bit clear! + */ +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW +#else +#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) +#endif + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP) +#define _PAGE_SWP_UFFD_WP _PAGE_USER +#else +#define _PAGE_UFFD_WP (_AT(pteval_t, 0)) +#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) +#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) +#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4) +#else +#define _PAGE_NX (_AT(pteval_t, 0)) +#define _PAGE_DEVMAP (_AT(pteval_t, 0)) +#define _PAGE_SOFTW4 (_AT(pteval_t, 0)) +#endif + +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) + +/* + * Set of bits not changed in pte_modify. The pte's + * protection key is treated like _PAGE_RW, for + * instance, and is *not* included in this mask since + * pte_modify() does modify it. + */ +#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\ + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_UFFD_WP) +#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) +#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) + +/* + * The cache modes defined here are used to translate between pure SW usage + * and the HW defined cache mode bits and/or PAT entries. + * + * The resulting bits for PWT, PCD and PAT should be chosen in a way + * to have the WB mode at index 0 (all bits clear). This is the default + * right now and likely would break too much if changed. + */ +#ifndef __ASSEMBLY__ +enum page_cache_mode { + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_UC_MINUS = 2, + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 +}; +#endif + +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) + +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) +#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE) + +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) +#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) + +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE + + +#ifndef __ASSEMBLY__ + +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) + +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) + +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) + +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) + +#endif /* __ASSEMBLY__ */ + +/* + * early identity mapping pte attrib macros. + */ +#ifdef CONFIG_X86_64 +#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC +#else +#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ +#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ +#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ +#endif + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +#ifndef __ASSEMBLY__ + +#include + +/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */ +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) + +/* + * Extracts the flags from a (pte|pmd|pud|pgd)val_t + * This includes the protection key value. + */ +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) + +typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; + +typedef struct { pgdval_t pgd; } pgd_t; + +static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | _PAGE_NX); +} +#define pgprot_nx pgprot_nx + +#ifdef CONFIG_X86_PAE + +/* + * PHYSICAL_PAGE_MASK might be non-constant when SME is compiled in, so we can't + * use it here. + */ + +#define PGD_PAE_PAGE_MASK ((signed long)PAGE_MASK) +#define PGD_PAE_PHYS_MASK (((1ULL << __PHYSICAL_MASK_SHIFT)-1) & PGD_PAE_PAGE_MASK) + +/* + * PAE allows Base Address, P, PWT, PCD and AVL bits to be set in PGD entries. + * All other bits are Reserved MBZ + */ +#define PGD_ALLOWED_BITS (PGD_PAE_PHYS_MASK | _PAGE_PRESENT | \ + _PAGE_PWT | _PAGE_PCD | \ + _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3) + +#else +/* No need to mask any bits for !PAE */ +#define PGD_ALLOWED_BITS (~0ULL) +#endif + +static inline pgd_t native_make_pgd(pgdval_t val) +{ + return (pgd_t) { val & PGD_ALLOWED_BITS }; +} + +static inline pgdval_t native_pgd_val(pgd_t pgd) +{ + return pgd.pgd & PGD_ALLOWED_BITS; +} + +static inline pgdval_t pgd_flags(pgd_t pgd) +{ + return native_pgd_val(pgd) & PTE_FLAGS_MASK; +} + +#if CONFIG_PGTABLE_LEVELS > 4 +typedef struct { p4dval_t p4d; } p4d_t; + +static inline p4d_t native_make_p4d(pudval_t val) +{ + return (p4d_t) { val }; +} + +static inline p4dval_t native_p4d_val(p4d_t p4d) +{ + return p4d.p4d; +} +#else +#include + +static inline p4d_t native_make_p4d(pudval_t val) +{ + return (p4d_t) { .pgd = native_make_pgd((pgdval_t)val) }; +} + +static inline p4dval_t native_p4d_val(p4d_t p4d) +{ + return native_pgd_val(p4d.pgd); +} +#endif + +#if CONFIG_PGTABLE_LEVELS > 3 +typedef struct { pudval_t pud; } pud_t; + +static inline pud_t native_make_pud(pmdval_t val) +{ + return (pud_t) { val }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return pud.pud; +} +#else +#include + +static inline pud_t native_make_pud(pudval_t val) +{ + return (pud_t) { .p4d.pgd = native_make_pgd(val) }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return native_pgd_val(pud.p4d.pgd); +} +#endif + +#if CONFIG_PGTABLE_LEVELS > 2 +typedef struct { pmdval_t pmd; } pmd_t; + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { val }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} +#else +#include + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return native_pgd_val(pmd.pud.p4d.pgd); +} +#endif + +static inline p4dval_t p4d_pfn_mask(p4d_t p4d) +{ + /* No 512 GiB huge pages yet */ + return PTE_PFN_MASK; +} + +static inline p4dval_t p4d_flags_mask(p4d_t p4d) +{ + return ~p4d_pfn_mask(p4d); +} + +static inline p4dval_t p4d_flags(p4d_t p4d) +{ + return native_p4d_val(p4d) & p4d_flags_mask(p4d); +} + +static inline pudval_t pud_pfn_mask(pud_t pud) +{ + if (native_pud_val(pud) & _PAGE_PSE) + return PHYSICAL_PUD_PAGE_MASK; + else + return PTE_PFN_MASK; +} + +static inline pudval_t pud_flags_mask(pud_t pud) +{ + return ~pud_pfn_mask(pud); +} + +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & pud_flags_mask(pud); +} + +static inline pmdval_t pmd_pfn_mask(pmd_t pmd) +{ + if (native_pmd_val(pmd) & _PAGE_PSE) + return PHYSICAL_PMD_PAGE_MASK; + else + return PTE_PFN_MASK; +} + +static inline pmdval_t pmd_flags_mask(pmd_t pmd) +{ + return ~pmd_pfn_mask(pmd); +} + +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & pmd_flags_mask(pmd); +} + +static inline pte_t native_make_pte(pteval_t val) +{ + return (pte_t) { .pte = val }; +} + +static inline pteval_t native_pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline pteval_t pte_flags(pte_t pte) +{ + return native_pte_val(pte) & PTE_FLAGS_MASK; +} + +#define __pte2cm_idx(cb) \ + ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \ + (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \ + (((cb) >> _PAGE_BIT_PWT) & 1)) +#define __cm_idx2pte(i) \ + ((((i) & 4) << (_PAGE_BIT_PAT - 2)) | \ + (((i) & 2) << (_PAGE_BIT_PCD - 1)) | \ + (((i) & 1) << _PAGE_BIT_PWT)) + +unsigned long cachemode2protval(enum page_cache_mode pcm); + +static inline pgprotval_t protval_4k_2_large(pgprotval_t val) +{ + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); +} +static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) +{ + return __pgprot(protval_4k_2_large(pgprot_val(pgprot))); +} +static inline pgprotval_t protval_large_2_4k(pgprotval_t val) +{ + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT_LARGE) >> + (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); +} +static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) +{ + return __pgprot(protval_large_2_4k(pgprot_val(pgprot))); +} + + +typedef struct page *pgtable_t; + +extern pteval_t __supported_pte_mask; +extern pteval_t __default_kernel_pte_mask; +extern void set_nx(void); +extern int nx_enabled; + +#define pgprot_writecombine pgprot_writecombine +extern pgprot_t pgprot_writecombine(pgprot_t prot); + +#define pgprot_writethrough pgprot_writethrough +extern pgprot_t pgprot_writethrough(pgprot_t prot); + +/* Indicate that x86 has its own track and untrack pfn vma functions */ +#define __HAVE_PFNMAP_TRACKING + +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); + +/* Install a pte for a particular vaddr in kernel space. */ +void set_pte_vaddr(unsigned long vaddr, pte_t pte); + +#ifdef CONFIG_X86_32 +extern void native_pagetable_init(void); +#else +#define native_pagetable_init paging_init +#endif + +struct seq_file; +extern void arch_report_meminfo(struct seq_file *m); + +enum pg_level { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, + PG_LEVEL_512G, + PG_LEVEL_NUM +}; + +#ifdef CONFIG_PROC_FS +extern void update_page_count(int level, unsigned long pages); +#else +static inline void update_page_count(int level, unsigned long pages) { } +#endif + +/* + * Helper function that returns the kernel pagetable entry controlling + * the virtual address 'address'. NULL means no pagetable entry present. + * NOTE: the return type is pte_t but if the pmd is PSE then we return it + * as a pte too. + */ +extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level); +extern pmd_t *lookup_pmd_address(unsigned long address); +extern phys_addr_t slow_virt_to_phys(void *__address); +extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, + unsigned long address, + unsigned numpages, + unsigned long page_flags); +extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, + unsigned long numpages); +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h new file mode 100644 index 000000000..2e6c04d8a --- /dev/null +++ b/arch/x86/include/asm/pkeys.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PKEYS_H +#define _ASM_X86_PKEYS_H + +/* + * If more than 16 keys are ever supported, a thorough audit + * will be necessary to ensure that the types that store key + * numbers and masks have sufficient capacity. + */ +#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1) + +extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); + +static inline bool arch_pkeys_enabled(void) +{ + return cpu_feature_enabled(X86_FEATURE_OSPKE); +} + +/* + * Try to dedicate one of the protection keys to be used as an + * execute-only protection key. + */ +extern int __execute_only_pkey(struct mm_struct *mm); +static inline int execute_only_pkey(struct mm_struct *mm) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return ARCH_DEFAULT_PKEY; + + return __execute_only_pkey(mm); +} + +extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey); +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return 0; + + return __arch_override_mprotect_pkey(vma, prot, pkey); +} + +#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3) + +#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) +#define mm_set_pkey_allocated(mm, pkey) do { \ + mm_pkey_allocation_map(mm) |= (1U << pkey); \ +} while (0) +#define mm_set_pkey_free(mm, pkey) do { \ + mm_pkey_allocation_map(mm) &= ~(1U << pkey); \ +} while (0) + +static inline +bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. + */ + if (pkey < 0) + return false; + if (pkey >= arch_max_pkey()) + return false; + /* + * The exec-only pkey is set in the allocation map, but + * is not available to any of the user interfaces like + * mprotect_pkey(). + */ + if (pkey == mm->context.execute_only_pkey) + return false; + + return mm_pkey_allocation_map(mm) & (1U << pkey); +} + +/* + * Returns a positive, 4-bit key on success, or -1 on failure. + */ +static inline +int mm_pkey_alloc(struct mm_struct *mm) +{ + /* + * Note: this is the one and only place we make sure + * that the pkey is valid as far as the hardware is + * concerned. The rest of the kernel trusts that + * only good, valid pkeys come out of here. + */ + u16 all_pkeys_mask = ((1U << arch_max_pkey()) - 1); + int ret; + + /* + * Are we out of pkeys? We must handle this specially + * because ffz() behavior is undefined if there are no + * zeros. + */ + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) + return -1; + + ret = ffz(mm_pkey_allocation_map(mm)); + + mm_set_pkey_allocated(mm, ret); + + return ret; +} + +static inline +int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + if (!mm_pkey_is_allocated(mm, pkey)) + return -EINVAL; + + mm_set_pkey_free(mm, pkey); + + return 0; +} + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 | + VM_PKEY_BIT2 | VM_PKEY_BIT3; + + return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT; +} + +#endif /*_ASM_X86_PKEYS_H */ diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h new file mode 100644 index 000000000..74f0a2d34 --- /dev/null +++ b/arch/x86/include/asm/pkru.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PKRU_H +#define _ASM_X86_PKRU_H + +#include + +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u +#define PKRU_BITS_PER_PKEY 2 + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#define pkru_get_init_value() READ_ONCE(init_pkru_value) +#else +#define init_pkru_value 0 +#define pkru_get_init_value() 0 +#endif + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u32 read_pkru(void) +{ + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + return rdpkru(); + return 0; +} + +static inline void write_pkru(u32 pkru) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru != rdpkru()) + wrpkru(pkru); +} + +static inline void pkru_write_default(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + wrpkru(pkru_get_init_value()); +} + +#endif diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h new file mode 100644 index 000000000..40f922705 --- /dev/null +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * platform_sst_audio.h: sst audio platform data header file + * + * Copyright (C) 2012-14 Intel Corporation + * Author: Jeeja KP + * Omair Mohammed Abdullah + * Vinod Koul ,vinod.koul@intel.com> + */ +#ifndef _PLATFORM_SST_AUDIO_H_ +#define _PLATFORM_SST_AUDIO_H_ + +#define MAX_NUM_STREAMS_MRFLD 25 +#define MAX_NUM_STREAMS MAX_NUM_STREAMS_MRFLD + +enum sst_audio_task_id_mrfld { + SST_TASK_ID_NONE = 0, + SST_TASK_ID_SBA = 1, + SST_TASK_ID_MEDIA = 3, + SST_TASK_ID_MAX = SST_TASK_ID_MEDIA, +}; + +/* Device IDs for Merrifield are Pipe IDs, + * ref: DSP spec v0.75 */ +enum sst_audio_device_id_mrfld { + /* Output pipeline IDs */ + PIPE_ID_OUT_START = 0x0, + PIPE_CODEC_OUT0 = 0x2, + PIPE_CODEC_OUT1 = 0x3, + PIPE_SPROT_LOOP_OUT = 0x4, + PIPE_MEDIA_LOOP1_OUT = 0x5, + PIPE_MEDIA_LOOP2_OUT = 0x6, + PIPE_VOIP_OUT = 0xC, + PIPE_PCM0_OUT = 0xD, + PIPE_PCM1_OUT = 0xE, + PIPE_PCM2_OUT = 0xF, + PIPE_MEDIA0_OUT = 0x12, + PIPE_MEDIA1_OUT = 0x13, +/* Input Pipeline IDs */ + PIPE_ID_IN_START = 0x80, + PIPE_CODEC_IN0 = 0x82, + PIPE_CODEC_IN1 = 0x83, + PIPE_SPROT_LOOP_IN = 0x84, + PIPE_MEDIA_LOOP1_IN = 0x85, + PIPE_MEDIA_LOOP2_IN = 0x86, + PIPE_VOIP_IN = 0x8C, + PIPE_PCM0_IN = 0x8D, + PIPE_PCM1_IN = 0x8E, + PIPE_MEDIA0_IN = 0x8F, + PIPE_MEDIA1_IN = 0x90, + PIPE_MEDIA2_IN = 0x91, + PIPE_MEDIA3_IN = 0x9C, + PIPE_RSVD = 0xFF, +}; + +/* The stream map for each platform consists of an array of the below + * stream map structure. + */ +struct sst_dev_stream_map { + u8 dev_num; /* device id */ + u8 subdev_num; /* substream */ + u8 direction; + u8 device_id; /* fw id */ + u8 task_id; /* fw task */ + u8 status; +}; + +struct sst_platform_data { + /* Intel software platform id*/ + struct sst_dev_stream_map *pdev_strm_map; + unsigned int strm_map_size; +}; + +struct sst_info { + u32 iram_start; + u32 iram_end; + bool iram_use; + u32 dram_start; + u32 dram_end; + bool dram_use; + u32 imr_start; + u32 imr_end; + bool imr_use; + u32 mailbox_start; + bool use_elf; + bool lpe_viewpt_rqd; + unsigned int max_streams; + u32 dma_max_len; + u8 num_probes; +}; + +struct sst_lib_dnld_info { + unsigned int mod_base; + unsigned int mod_end; + unsigned int mod_table_offset; + unsigned int mod_table_size; + bool mod_ddr_dnld; +}; + +struct sst_res_info { + unsigned int shim_offset; + unsigned int shim_size; + unsigned int shim_phy_addr; + unsigned int ssp0_offset; + unsigned int ssp0_size; + unsigned int dma0_offset; + unsigned int dma0_size; + unsigned int dma1_offset; + unsigned int dma1_size; + unsigned int iram_offset; + unsigned int iram_size; + unsigned int dram_offset; + unsigned int dram_size; + unsigned int mbox_offset; + unsigned int mbox_size; + unsigned int acpi_lpe_res_index; + unsigned int acpi_ddr_index; + unsigned int acpi_ipc_irq_index; +}; + +struct sst_ipc_info { + int ipc_offset; + unsigned int mbox_recv_off; +}; + +struct sst_platform_info { + const struct sst_info *probe_data; + const struct sst_ipc_info *ipc_info; + const struct sst_res_info *res_info; + const struct sst_lib_dnld_info *lib_info; + const char *platform; + bool streams_lost_on_suspend; +}; +int add_sst_platform_device(void); +#endif + diff --git a/arch/x86/include/asm/pm-trace.h b/arch/x86/include/asm/pm-trace.h new file mode 100644 index 000000000..bfa32aa42 --- /dev/null +++ b/arch/x86/include/asm/pm-trace.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PM_TRACE_H +#define _ASM_X86_PM_TRACE_H + +#include + +#define TRACE_RESUME(user) \ +do { \ + if (pm_trace_enabled) { \ + const void *tracedata; \ + asm volatile(_ASM_MOV " $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n\t" \ + _ASM_PTR " %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_pm_trace(tracedata, user); \ + } \ +} while (0) + +#define TRACE_SUSPEND(user) TRACE_RESUME(user) + +#endif /* _ASM_X86_PM_TRACE_H */ diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h new file mode 100644 index 000000000..374336e21 --- /dev/null +++ b/arch/x86/include/asm/posix_types.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +# ifdef CONFIG_X86_32 +# include +# else +# include +# endif diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 000000000..5f6daea1e --- /dev/null +++ b/arch/x86/include/asm/preempt.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include +#include +#include +#include + +DECLARE_PER_CPU(int, __preempt_count); + +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x80000000 + +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ +#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void preempt_count_set(int pc) +{ + int old, new; + + do { + old = raw_cpu_read_4(__preempt_count); + new = (old & PREEMPT_NEED_RESCHED) | + (pc & ~PREEMPT_NEED_RESCHED); + } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old); +} + +/* + * must be macros to avoid header recursion hell + */ +#define init_task_preempt_count(p) do { } while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); +} + +static __always_inline void clear_preempt_need_resched(void) +{ + raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + raw_cpu_add_4(__preempt_count, val); +} + +static __always_inline void __preempt_count_sub(int val) +{ + raw_cpu_add_4(__preempt_count, -val); +} + +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ +static __always_inline bool __preempt_count_dec_and_test(void) +{ + return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(int preempt_offset) +{ + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); +} + +#ifdef CONFIG_PREEMPTION + +extern asmlinkage void preempt_schedule(void); +extern asmlinkage void preempt_schedule_thunk(void); + +#define preempt_schedule_dynamic_enabled preempt_schedule_thunk +#define preempt_schedule_dynamic_disabled NULL + +extern asmlinkage void preempt_schedule_notrace(void); +extern asmlinkage void preempt_schedule_notrace_thunk(void); + +#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk +#define preempt_schedule_notrace_dynamic_disabled NULL + +#ifdef CONFIG_PREEMPT_DYNAMIC + +DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); + +#define __preempt_schedule() \ +do { \ + __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \ + asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \ +} while (0) + +DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); + +#define __preempt_schedule_notrace() \ +do { \ + __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \ + asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \ +} while (0) + +#else /* PREEMPT_DYNAMIC */ + +#define __preempt_schedule() \ + asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT); + +#define __preempt_schedule_notrace() \ + asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT); + +#endif /* PREEMPT_DYNAMIC */ + +#endif /* PREEMPTION */ + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h new file mode 100644 index 000000000..1c7f3815b --- /dev/null +++ b/arch/x86/include/asm/probe_roms.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PROBE_ROMS_H_ +#define _PROBE_ROMS_H_ +struct pci_dev; + +extern void __iomem *pci_map_biosrom(struct pci_dev *pdev); +extern void pci_unmap_biosrom(void __iomem *rom); +extern size_t pci_biosrom_size(struct pci_dev *pdev); +#endif diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h new file mode 100644 index 000000000..efe3e46e4 --- /dev/null +++ b/arch/x86/include/asm/processor-cyrix.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NSC/Cyrix CPU indexed register access. Must be inlined instead of + * macros to ensure correct access ordering + * Access order is always 0x22 (=offset), 0x23 (=value) + */ + +#include + +static inline u8 getCx86(u8 reg) +{ + return pc_conf_get(reg); +} + +static inline void setCx86(u8 reg, u8 data) +{ + pc_conf_set(reg, data); +} diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h new file mode 100644 index 000000000..02c2cbda4 --- /dev/null +++ b/arch/x86/include/asm/processor-flags.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PROCESSOR_FLAGS_H +#define _ASM_X86_PROCESSOR_FLAGS_H + +#include +#include + +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif + +/* + * CR3's layout varies depending on several things. + * + * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID. + * If PAE is enabled, then CR3[11:5] is part of the PDPT address + * (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored. + * Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and + * CR3[2:0] and CR3[11:5] are ignored. + * + * In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD. + * + * CR3[63] is always read as zero. If CR4.PCIDE is set, then CR3[63] may be + * written as 1 to prevent the write to CR3 from flushing the TLB. + * + * On systems with SME, one bit (in a variable position!) is stolen to indicate + * that the top-level paging structure is encrypted. + * + * All of the remaining bits indicate the physical address of the top-level + * paging structure. + * + * CR3_ADDR_MASK is the mask used by read_cr3_pa(). + */ +#ifdef CONFIG_X86_64 +/* Mask off the address space ID and SME encryption bits. */ +#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) +#define CR3_PCID_MASK 0xFFFull +#define CR3_NOFLUSH BIT_ULL(63) + +#else +/* + * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save + * a tiny bit of code size by setting all the bits. + */ +#define CR3_ADDR_MASK 0xFFFFFFFFull +#define CR3_PCID_MASK 0ull +#define CR3_NOFLUSH 0 +#endif + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define X86_CR3_PTI_PCID_USER_BIT 11 +#endif + +#endif /* _ASM_X86_PROCESSOR_FLAGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h new file mode 100644 index 000000000..94ea13adb --- /dev/null +++ b/arch/x86/include/asm/processor.h @@ -0,0 +1,872 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PROCESSOR_H +#define _ASM_X86_PROCESSOR_H + +#include + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; +struct io_bitmap; +struct vm86; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * We handle most unaligned accesses in hardware. On the other hand + * unaligned DMA can be quite expensive on some Nehalem processors. + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN 0 + +#define HBP_NUM 4 + +/* + * These alignment constraints are for performance in the vSMP case, + * but in the task_struct case we must also meet hardware imposed + * alignment requirements of the FPU state: + */ +#ifdef CONFIG_X86_VSMP +# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) +# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) +#else +# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) +# define ARCH_MIN_MMSTRUCT_ALIGN 0 +#endif + +enum tlb_infos { + ENTRIES, + NR_INFO +}; + +extern u16 __read_mostly tlb_lli_4k[NR_INFO]; +extern u16 __read_mostly tlb_lli_2m[NR_INFO]; +extern u16 __read_mostly tlb_lli_4m[NR_INFO]; +extern u16 __read_mostly tlb_lld_4k[NR_INFO]; +extern u16 __read_mostly tlb_lld_2m[NR_INFO]; +extern u16 __read_mostly tlb_lld_4m[NR_INFO]; +extern u16 __read_mostly tlb_lld_1g[NR_INFO]; + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head_32.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_stepping; +#ifdef CONFIG_X86_64 + /* Number of 4K pages in DTLB/ITLB combined(in pages): */ + int x86_tlbsize; +#endif +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_capability[NVMXINTS]; +#endif + __u8 x86_virt_bits; + __u8 x86_phys_bits; + /* CPUID returned core id bits: */ + __u8 x86_coreid_bits; + __u8 cu_id; + /* Max extended CPUID function supported: */ + __u32 extended_cpuid_level; + /* Maximum supported CPUID level, -1=no CPUID: */ + int cpuid_level; + /* + * Align to size of unsigned long because the x86_capability array + * is passed to bitops which require the alignment. Use unnamed + * union to enforce the array is aligned to size of unsigned long. + */ + union { + __u32 x86_capability[NCAPINTS + NBUGINTS]; + unsigned long x86_capability_alignment; + }; + char x86_vendor_id[16]; + char x86_model_id[64]; + /* in KB - valid for CPUS which support this call: */ + unsigned int x86_cache_size; + int x86_cache_alignment; /* In bytes */ + /* Cache QoS architectural values, valid only on the BSP: */ + int x86_cache_max_rmid; /* max index */ + int x86_cache_occ_scale; /* scale to bytes */ + int x86_cache_mbm_width_offset; + int x86_power; + unsigned long loops_per_jiffy; + /* protected processor identification number */ + u64 ppin; + /* cpuid returned max cores value: */ + u16 x86_max_cores; + u16 apicid; + u16 initial_apicid; + u16 x86_clflush_size; + /* number of cores as seen by the OS: */ + u16 booted_cores; + /* Physical processor id: */ + u16 phys_proc_id; + /* Logical processor id: */ + u16 logical_proc_id; + /* Core id: */ + u16 cpu_core_id; + u16 cpu_die_id; + u16 logical_die_id; + /* Index into per_cpu list: */ + u16 cpu_index; + /* Is SMT active on this core? */ + bool smt_active; + u32 microcode; + /* Address space bits used by the cache internally */ + u8 x86_cache_bits; + unsigned initialized : 1; +} __randomize_layout; + +struct cpuid_regs { + u32 eax, ebx, ecx, edx; +}; + +enum cpuid_regs_idx { + CPUID_EAX = 0, + CPUID_EBX, + CPUID_ECX, + CPUID_EDX, +}; + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_NSC 8 +#define X86_VENDOR_HYGON 9 +#define X86_VENDOR_ZHAOXIN 10 +#define X86_VENDOR_VORTEX 11 +#define X86_VENDOR_NUM 12 + +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ +extern struct cpuinfo_x86 boot_cpu_data; +extern struct cpuinfo_x86 new_cpu_data; + +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + +#ifdef CONFIG_SMP +DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#else +#define cpu_info boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#endif + +extern const struct seq_operations cpuinfo_op; + +#define cache_line_size() (boot_cpu_data.x86_cache_alignment) + +extern void cpu_detect(struct cpuinfo_x86 *c); + +static inline unsigned long long l1tf_pfn_limit(void) +{ + return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); +} + +extern void early_cpu_init(void); +extern void identify_boot_cpu(void); +extern void identify_secondary_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); +void print_cpu_msr(struct cpuinfo_x86 *); + +#ifdef CONFIG_X86_32 +extern int have_cpuid_p(void); +#else +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + +#define native_cpuid_reg(reg) \ +static inline unsigned int native_cpuid_##reg(unsigned int op) \ +{ \ + unsigned int eax = op, ebx, ecx = 0, edx; \ + \ + native_cpuid(&eax, &ebx, &ecx, &edx); \ + \ + return reg; \ +} + +/* + * Native CPUID functions returning a single datum. + */ +native_cpuid_reg(eax) +native_cpuid_reg(ebx) +native_cpuid_reg(ecx) +native_cpuid_reg(edx) + +/* + * Friendlier CR3 helpers. + */ +static inline unsigned long read_cr3_pa(void) +{ + return __read_cr3() & CR3_ADDR_MASK; +} + +static inline unsigned long native_read_cr3_pa(void) +{ + return __native_read_cr3() & CR3_ADDR_MASK; +} + +static inline void load_cr3(pgd_t *pgdir) +{ + write_cr3(__sme_pa(pgdir)); +} + +/* + * Note that while the legacy 'TSS' name comes from 'Task State Segment', + * on modern x86 CPUs the TSS also holds information important to 64-bit mode, + * unrelated to the task-switch mechanism: + */ +#ifdef CONFIG_X86_32 +/* This is the TSS defined by the hardware. */ +struct x86_hw_tss { + unsigned short back_link, __blh; + unsigned long sp0; + unsigned short ss0, __ss0h; + unsigned long sp1; + + /* + * We don't use ring 1, so ss1 is a convenient scratch space in + * the same cacheline as sp0. We use ss1 to cache the value in + * MSR_IA32_SYSENTER_CS. When we context switch + * MSR_IA32_SYSENTER_CS, we first check if the new value being + * written matches ss1, and, if it's not, then we wrmsr the new + * value and update ss1. + * + * The only reason we context switch MSR_IA32_SYSENTER_CS is + * that we set it to zero in vm86 tasks to avoid corrupting the + * stack if we were to go through the sysenter path from vm86 + * mode. + */ + unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ + + unsigned short __ss1h; + unsigned long sp2; + unsigned short ss2, __ss2h; + unsigned long __cr3; + unsigned long ip; + unsigned long flags; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long bx; + unsigned long sp; + unsigned long bp; + unsigned long si; + unsigned long di; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace; + unsigned short io_bitmap_base; + +} __attribute__((packed)); +#else +struct x86_hw_tss { + u32 reserved1; + u64 sp0; + u64 sp1; + + /* + * Since Linux does not use ring 2, the 'sp2' slot is unused by + * hardware. entry_SYSCALL_64 uses it as scratch space to stash + * the user RSP value. + */ + u64 sp2; + + u64 reserved2; + u64 ist[7]; + u32 reserved3; + u32 reserved4; + u16 reserved5; + u16 io_bitmap_base; + +} __attribute__((packed)); +#endif + +/* + * IO-bitmap sizes: + */ +#define IO_BITMAP_BITS 65536 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) + +#define IO_BITMAP_OFFSET_VALID_MAP \ + (offsetof(struct tss_struct, io_bitmap.bitmap) - \ + offsetof(struct tss_struct, x86_tss)) + +#define IO_BITMAP_OFFSET_VALID_ALL \ + (offsetof(struct tss_struct, io_bitmap.mapall) - \ + offsetof(struct tss_struct, x86_tss)) + +#ifdef CONFIG_X86_IOPL_IOPERM +/* + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. + */ +# define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ + sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT \ + (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) + +struct entry_stack { + char stack[PAGE_SIZE]; +}; + +struct entry_stack_page { + struct entry_stack stack; +} __aligned(PAGE_SIZE); + +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { + /* The sequence number of the last active bitmap. */ + u64 prev_sequence; + + /* + * Store the dirty size of the last io bitmap offender. The next + * one will have to do the cleanup as the switch out to a non io + * bitmap user will just set x86_tss.io_bitmap_base to a value + * outside of the TSS limit. So for sane tasks there is no need to + * actually touch the io_bitmap at all. + */ + unsigned int prev_max; + + /* + * The extra 1 is there because the CPU will access an + * additional byte beyond the end of the IO permission + * bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ + unsigned long bitmap[IO_BITMAP_LONGS + 1]; + + /* + * Special I/O bitmap to emulate IOPL(3). All bytes zero, + * except the additional byte at the end. + */ + unsigned long mapall[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. + */ + struct x86_hw_tss x86_tss; + + struct x86_io_bitmap io_bitmap; +} __aligned(PAGE_SIZE); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); + +/* Per CPU interrupt stacks */ +struct irq_stack { + char stack[IRQ_STACK_SIZE]; +} __aligned(IRQ_STACK_SIZE); + +DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); + +#ifdef CONFIG_X86_64 +struct fixed_percpu_data { + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + * + * Once we are willing to require -mstack-protector-guard-symbol= + * support for x86_64 stackprotector, we can get rid of this. + */ + char gs_base[40]; + unsigned long stack_canary; +}; + +DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; +DECLARE_INIT_PER_CPU(fixed_percpu_data); + +static inline unsigned long cpu_kernelmode_gs_base(int cpu) +{ + return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); +} + +DECLARE_PER_CPU(void *, hardirq_stack_ptr); +DECLARE_PER_CPU(bool, hardirq_stack_inuse); +extern asmlinkage void ignore_sysret(void); + +/* Save actual FS/GS selectors and bases to current->thread */ +void current_save_fsgs(void); +#else /* X86_64 */ +#ifdef CONFIG_STACKPROTECTOR +DECLARE_PER_CPU(unsigned long, __stack_chk_guard); +#endif +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); +DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); +#endif /* !X86_64 */ + +struct perf_event; + +struct thread_struct { + /* Cached TLS descriptors: */ + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; +#ifdef CONFIG_X86_32 + unsigned long sp0; +#endif + unsigned long sp; +#ifdef CONFIG_X86_32 + unsigned long sysenter_cs; +#else + unsigned short es; + unsigned short ds; + unsigned short fsindex; + unsigned short gsindex; +#endif + +#ifdef CONFIG_X86_64 + unsigned long fsbase; + unsigned long gsbase; +#else + /* + * XXX: this could presumably be unsigned short. Alternatively, + * 32-bit kernels could be taught to use fsindex instead. + */ + unsigned long fs; + unsigned long gs; +#endif + + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long virtual_dr6; + /* Keep track of the exact dr7 value set by the user */ + unsigned long ptrace_dr7; + /* Fault info: */ + unsigned long cr2; + unsigned long trap_nr; + unsigned long error_code; +#ifdef CONFIG_VM86 + /* Virtual 86 mode info */ + struct vm86 *vm86; +#endif + /* IO permissions: */ + struct io_bitmap *io_bitmap; + + /* + * IOPL. Privilege level dependent I/O permission which is + * emulated via the I/O bitmap to prevent user space from disabling + * interrupts. + */ + unsigned long iopl_emul; + + unsigned int iopl_warn:1; + unsigned int sig_on_uaccess_err:1; + + /* + * Protection Keys Register for Userspace. Loaded immediately on + * context switch. Store it in thread_struct to avoid a lookup in + * the tasks's FPU xstate buffer. This value is only valid when a + * task is scheduled out. For 'current' the authoritative source of + * PKRU is the hardware itself. + */ + u32 pkru; + + /* Floating point and extended processor state */ + struct fpu fpu; + /* + * WARNING: 'fpu' is dynamically-sized. It *MUST* be at + * the end. + */ +}; + +extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size); + +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + fpu_thread_struct_whitelist(offset, size); +} + +static inline void +native_load_sp0(unsigned long sp0) +{ + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); +} + +static __always_inline void native_swapgs(void) +{ +#ifdef CONFIG_X86_64 + asm volatile("swapgs" ::: "memory"); +#endif +} + +static __always_inline unsigned long current_top_of_stack(void) +{ + /* + * We can't read directly from tss.sp0: sp0 on x86_32 is special in + * and around vm86 mode and sp0 on x86_64 is special because of the + * entry trampoline. + */ + return this_cpu_read_stable(cpu_current_top_of_stack); +} + +static __always_inline bool on_thread_stack(void) +{ + return (unsigned long)(current_top_of_stack() - + current_stack_pointer) < THREAD_SIZE; +} + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#define __cpuid native_cpuid + +static inline void load_sp0(unsigned long sp0) +{ + native_load_sp0(sp0); +} + +#endif /* CONFIG_PARAVIRT_XXL */ + +unsigned long __get_wchan(struct task_struct *p); + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return eax; +} + +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ebx; +} + +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ecx; +} + +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return edx; +} + +extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void amd_e400_c1e_apic_setup(void); + +extern unsigned long boot_option_idle_override; + +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, + IDLE_POLL}; + +extern void enable_sep_cpu(void); +extern int sysenter_setup(void); + + +/* Defined in head.S */ +extern struct desc_ptr early_gdt_descr; + +extern void switch_to_new_gdt(int); +extern void load_direct_gdt(int); +extern void load_fixmap_gdt(int); +extern void load_percpu_segment(int); +extern void cpu_init(void); +extern void cpu_init_secondary(void); +extern void cpu_init_exception_handling(void); +extern void cr4_init(void); + +static inline unsigned long get_debugctlmsr(void) +{ + unsigned long debugctlmsr = 0; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); + + return debugctlmsr; +} + +static inline void update_debugctlmsr(unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); +} + +extern void set_task_blockstep(struct task_struct *task, bool on); + +/* Boot loader type from the setup header: */ +extern int bootloader_type; +extern int bootloader_version; + +extern char ignore_fpu_irq; + +#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +#ifdef CONFIG_X86_32 +# define BASE_PREFETCH "" +# define ARCH_HAS_PREFETCH +#else +# define BASE_PREFETCH "prefetcht0 %P1" +#endif + +/* + * Prefetch instructions for Pentium III (+) and AMD Athlon (+) + * + * It's not worth to care about 3dnow prefetches for the K6 + * because they are microcoded there and very slow. + */ +static inline void prefetch(const void *x) +{ + alternative_input(BASE_PREFETCH, "prefetchnta %P1", + X86_FEATURE_XMM, + "m" (*(const char *)x)); +} + +/* + * 3dnow prefetch to get an exclusive cache line. + * Useful for spinlocks to avoid one state transition in the + * cache coherency protocol: + */ +static __always_inline void prefetchw(const void *x) +{ + alternative_input(BASE_PREFETCH, "prefetchw %P1", + X86_FEATURE_3DNOWPREFETCH, + "m" (*(const char *)x)); +} + +static inline void spin_lock_prefetch(const void *x) +{ + prefetchw(x); +} + +#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ + TOP_OF_KERNEL_STACK_PADDING) + +#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) + +#define task_pt_regs(task) \ +({ \ + unsigned long __ptr = (unsigned long)task_stack_page(task); \ + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ + ((struct pt_regs *)__ptr) - 1; \ +}) + +#ifdef CONFIG_X86_32 +#define INIT_THREAD { \ + .sp0 = TOP_OF_INIT_STACK, \ + .sysenter_cs = __KERNEL_CS, \ +} + +#define KSTK_ESP(task) (task_pt_regs(task)->sp) + +#else +#define INIT_THREAD { } + +extern unsigned long KSTK_ESP(struct task_struct *task); + +#endif /* CONFIG_X86_64 */ + +extern void start_thread(struct pt_regs *regs, unsigned long new_ip, + unsigned long new_sp); + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) +#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) + +#define KSTK_EIP(task) (task_pt_regs(task)->ip) + +/* Get/set a process' ability to use the timestamp counter instruction */ +#define GET_TSC_CTL(adr) get_tsc_mode((adr)) +#define SET_TSC_CTL(val) set_tsc_mode((val)) + +extern int get_tsc_mode(unsigned long adr); +extern int set_tsc_mode(unsigned int val); + +DECLARE_PER_CPU(u64, msr_misc_features_shadow); + +extern u16 get_llc_id(unsigned int cpu); + +#ifdef CONFIG_CPU_SUP_AMD +extern u32 amd_get_nodes_per_socket(void); +extern u32 amd_get_highest_perf(void); +extern bool cpu_has_ibpb_brtype_microcode(void); +extern void amd_clear_divider(void); +#else +static inline u32 amd_get_nodes_per_socket(void) { return 0; } +static inline u32 amd_get_highest_perf(void) { return 0; } +static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } +static inline void amd_clear_divider(void) { } +#endif + +#define for_each_possible_hypervisor_cpuid_base(function) \ + for (function = 0x40000000; function < 0x40010000; function += 0x100) + +static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) +{ + uint32_t base, eax, signature[3]; + + for_each_possible_hypervisor_cpuid_base(base) { + cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); + + if (!memcmp(sig, signature, 12) && + (leaves == 0 || ((eax - base) >= leaves))) + return base; + } + + return 0; +} + +extern unsigned long arch_align_stack(unsigned long sp); +void free_init_pages(const char *what, unsigned long begin, unsigned long end); +extern void free_kernel_image_pages(const char *what, void *begin, void *end); + +void default_idle(void); +#ifdef CONFIG_XEN +bool xen_set_default_idle(void); +#else +#define xen_set_default_idle 0 +#endif + +void __noreturn stop_this_cpu(void *dummy); +void microcode_check(struct cpuinfo_x86 *prev_info); +void store_cpu_caps(struct cpuinfo_x86 *info); + +enum l1tf_mitigations { + L1TF_MITIGATION_OFF, + L1TF_MITIGATION_FLUSH_NOWARN, + L1TF_MITIGATION_FLUSH, + L1TF_MITIGATION_FLUSH_NOSMT, + L1TF_MITIGATION_FULL, + L1TF_MITIGATION_FULL_FORCE +}; + +extern enum l1tf_mitigations l1tf_mitigation; + +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, +}; + +#ifdef CONFIG_X86_SGX +int arch_memory_failure(unsigned long pfn, int flags); +#define arch_memory_failure arch_memory_failure + +bool arch_is_platform_page(u64 paddr); +#define arch_is_platform_page arch_is_platform_page +#endif + +extern bool gds_ucode_mitigated(void); + +#endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h new file mode 100644 index 000000000..b716d291d --- /dev/null +++ b/arch/x86/include/asm/prom.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Definitions for Device tree / OpenFirmware handling on X86 + * + * based on arch/powerpc/include/asm/prom.h which is + * Copyright (C) 1996-2005 Paul Mackerras. + */ + +#ifndef _ASM_X86_PROM_H +#define _ASM_X86_PROM_H +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_OF +extern int of_ioapic; +extern u64 initial_dtb; +extern void add_dtb(u64 data); +void x86_of_pci_init(void); +void x86_dtb_init(void); +#else +static inline void add_dtb(u64 data) { } +static inline void x86_of_pci_init(void) { } +static inline void x86_dtb_init(void) { } +#define of_ioapic 0 +#endif + +extern char cmd_line[COMMAND_LINE_SIZE]; + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h new file mode 100644 index 000000000..84294b66b --- /dev/null +++ b/arch/x86/include/asm/proto.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PROTO_H +#define _ASM_X86_PROTO_H + +#include + +struct task_struct; + +/* misc architecture specific prototypes */ + +void syscall_init(void); + +#ifdef CONFIG_X86_64 +void entry_SYSCALL_64(void); +void entry_SYSCALL_64_safe_stack(void); +void entry_SYSRETQ_unsafe_stack(void); +void entry_SYSRETQ_end(void); +long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); +#endif + +#ifdef CONFIG_X86_32 +void entry_INT80_32(void); +void entry_SYSENTER_32(void); +void __begin_SYSENTER_singlestep_region(void); +void __end_SYSENTER_singlestep_region(void); +#endif + +#ifdef CONFIG_IA32_EMULATION +void entry_SYSENTER_compat(void); +void __end_entry_SYSENTER_compat(void); +void entry_SYSCALL_compat(void); +void entry_SYSCALL_compat_safe_stack(void); +void entry_SYSRETL_compat_unsafe_stack(void); +void entry_SYSRETL_compat_end(void); +#endif + +void x86_configure_nx(void); + +extern int reboot_force; + +long do_arch_prctl_common(int option, unsigned long arg2); + +#endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h new file mode 100644 index 000000000..07375b476 --- /dev/null +++ b/arch/x86/include/asm/pti.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PTI_H +#define _ASM_X86_PTI_H +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern void pti_init(void); +extern void pti_check_boottime_disable(void); +extern void pti_finalize(void); +#else +static inline void pti_check_boottime_disable(void) { } +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h new file mode 100644 index 000000000..f4db78b09 --- /dev/null +++ b/arch/x86/include/asm/ptrace.h @@ -0,0 +1,395 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PTRACE_H +#define _ASM_X86_PTRACE_H + +#include +#include +#include + +#ifndef __ASSEMBLY__ +#ifdef __i386__ + +struct pt_regs { + /* + * NB: 32-bit x86 CPUs are inconsistent as what happens in the + * following cases (where %seg represents a segment register): + * + * - pushl %seg: some do a 16-bit write and leave the high + * bits alone + * - movl %seg, [mem]: some do a 16-bit write despite the movl + * - IDT entry: some (e.g. 486) will leave the high bits of CS + * and (if applicable) SS undefined. + * + * Fortunately, x86-32 doesn't read the high bits on POP or IRET, + * so we can just treat all of the segment registers as 16-bit + * values. + */ + unsigned long bx; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long bp; + unsigned long ax; + unsigned short ds; + unsigned short __dsh; + unsigned short es; + unsigned short __esh; + unsigned short fs; + unsigned short __fsh; + /* + * On interrupt, gs and __gsh store the vector number. They never + * store gs any more. + */ + unsigned short gs; + unsigned short __gsh; + /* On interrupt, this is the error code. */ + unsigned long orig_ax; + unsigned long ip; + unsigned short cs; + unsigned short __csh; + unsigned long flags; + unsigned long sp; + unsigned short ss; + unsigned short __ssh; +}; + +#else /* __i386__ */ + +struct pt_regs { +/* + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry + * unless syscall needs a complete, fully filled "struct pt_regs". + */ + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; +/* These regs are callee-clobbered. Always saved on kernel entry. */ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; +/* + * On syscall entry, this is syscall#. On CPU exception, this is error code. + * On hw interrupt, it's IRQ number: + */ + unsigned long orig_ax; +/* Return frame for iretq */ + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +/* top of stack page */ +}; + +#endif /* !__i386__ */ + +#ifdef CONFIG_PARAVIRT +#include +#endif + +#include + +struct cpuinfo_x86; +struct task_struct; + +extern unsigned long profile_pc(struct pt_regs *regs); + +extern unsigned long +convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); +extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); + + +static inline unsigned long regs_return_value(struct pt_regs *regs) +{ + return regs->ax; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ax = rc; +} + +/* + * user_mode(regs) determines whether a register set came from user + * mode. On x86_32, this is true if V8086 mode was enabled OR if the + * register set was from protected mode with RPL-3 CS value. This + * tricky test checks that with one comparison. + * + * On x86_64, vm86 mode is mercifully nonexistent, and we don't need + * the extra check. + */ +static __always_inline int user_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; +#else + return !!(regs->cs & 3); +#endif +} + +static __always_inline int v8086_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (regs->flags & X86_VM_MASK); +#else + return 0; /* No V86 mode support in long mode */ +#endif +} + +static inline bool user_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 +#ifndef CONFIG_PARAVIRT_XXL + /* + * On non-paravirt systems, this is the only long mode CPL 3 + * selector. We do not allow long mode selectors in the LDT. + */ + return regs->cs == __USER_CS; +#else + /* Headers are too twisted for this to go in paravirt.h. */ + return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; +#endif +#else /* !CONFIG_X86_64 */ + return false; +#endif +} + +/* + * Determine whether the register set came from any context that is running in + * 64-bit mode. + */ +static inline bool any_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + return !user_mode(regs) || user_64bit_mode(regs); +#else + return false; +#endif +} + +#ifdef CONFIG_X86_64 +#define current_user_stack_pointer() current_pt_regs()->sp +#define compat_user_stack_pointer() current_pt_regs()->sp + +static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) +{ + bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && + regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); + + ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack && + regs->ip < (unsigned long)entry_SYSRETQ_end); +#ifdef CONFIG_IA32_EMULATION + ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && + regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); + ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack && + regs->ip < (unsigned long)entry_SYSRETL_compat_end); +#endif + + return ret; +} +#endif + +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->ip; +} + +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->ip = val; +} + +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->bp; +} + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + +static inline void user_stack_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->sp = val; +} + +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) +{ + return !(regs->flags & X86_EFLAGS_IF); +} + +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; +#ifdef CONFIG_X86_32 + /* The selector fields are 16-bit. */ + if (offset == offsetof(struct pt_regs, cs) || + offset == offsetof(struct pt_regs, ss) || + offset == offsetof(struct pt_regs, ds) || + offset == offsetof(struct pt_regs, es) || + offset == offsetof(struct pt_regs, fs) || + offset == offsetof(struct pt_regs, gs)) { + return *(u16 *)((unsigned long)regs + offset); + + } +#endif + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns the address of the @n th entry of the + * kernel stack which is specified by @regs. If the @n th entry is NOT in + * the kernel stack, this returns NULL. + */ +static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)regs->sp; + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return addr; + else + return NULL; +} + +/* To avoid include hell, we can't include uaccess.h */ +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr; + unsigned long val; + long ret; + + addr = regs_get_kernel_stack_nth_addr(regs, n); + if (addr) { + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); + if (!ret) + return val; + } + return 0; +} + +/** + * regs_get_kernel_argument() - get Nth function argument in kernel + * @regs: pt_regs of that context + * @n: function argument number (start from 0) + * + * regs_get_argument() returns @n th argument of the function call. + * Note that this chooses most probably assignment, in some case + * it can be incorrect. + * This is expected to be called from kprobes or ftrace with regs + * where the top of stack is the return address. + */ +static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, + unsigned int n) +{ + static const unsigned int argument_offs[] = { +#ifdef __i386__ + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), +#define NR_REG_ARGUMENTS 3 +#else + offsetof(struct pt_regs, di), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), +#define NR_REG_ARGUMENTS 6 +#endif + }; + + if (n >= NR_REG_ARGUMENTS) { + n -= NR_REG_ARGUMENTS - 1; + return regs_get_kernel_stack_nth(regs, n); + } else + return regs_get_register(regs, argument_offs[n]); +} + +#define arch_has_single_step() (1) +#ifdef CONFIG_X86_DEBUGCTLMSR +#define arch_has_block_step() (1) +#else +#define arch_has_block_step() (boot_cpu_data.x86 >= 6) +#endif + +#define ARCH_HAS_USER_SINGLE_STEP_REPORT + +struct user_desc; +extern int do_get_thread_area(struct task_struct *p, int idx, + struct user_desc __user *info); +extern int do_set_thread_area(struct task_struct *p, int idx, + struct user_desc __user *info, int can_allocate); + +#ifdef CONFIG_X86_64 +# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) +#else +# define do_set_thread_area_64(p, s, t) (0) +#endif + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h new file mode 100644 index 000000000..5528e9325 --- /dev/null +++ b/arch/x86/include/asm/purgatory.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PURGATORY_H +#define _ASM_X86_PURGATORY_H + +#ifndef __ASSEMBLY__ +#include + +extern void purgatory(void); +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h new file mode 100644 index 000000000..1436226ef --- /dev/null +++ b/arch/x86/include/asm/pvclock-abi.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PVCLOCK_ABI_H +#define _ASM_X86_PVCLOCK_ABI_H +#ifndef __ASSEMBLY__ + +/* + * These structs MUST NOT be changed. + * They are the ABI between hypervisor and guest OS. + * Both Xen and KVM are using this. + * + * pvclock_vcpu_time_info holds the system time and the tsc timestamp + * of the last update. So the guest can use the tsc delta to get a + * more precise system time. There is one per virtual cpu. + * + * pvclock_wall_clock references the point in time when the system + * time was zero (usually boot time), thus the guest calculates the + * current wall clock by adding the system time. + * + * Protocol for the "version" fields is: hypervisor raises it (making + * it uneven) before it starts updating the fields and raises it again + * (making it even) when it is done. Thus the guest can make sure the + * time values it got are consistent by checking the version before + * and after reading them. + */ + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +#define PVCLOCK_TSC_STABLE_BIT (1 << 0) +#define PVCLOCK_GUEST_STOPPED (1 << 1) +/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ +#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h new file mode 100644 index 000000000..19b695ff2 --- /dev/null +++ b/arch/x86/include/asm/pvclock.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PVCLOCK_H +#define _ASM_X86_PVCLOCK_H + +#include +#include + +/* some helper functions for xen and kvm pv clock sources */ +u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); +void pvclock_set_flags(u8 flags); +unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); +void pvclock_read_wallclock(struct pvclock_wall_clock *wall, + struct pvclock_vcpu_time_info *vcpu, + struct timespec64 *ts); +void pvclock_resume(void); + +void pvclock_touch_watchdogs(void); + +static __always_inline +unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) +{ + unsigned version = src->version & ~1; + /* Make sure that the version is read before the data. */ + virt_rmb(); + return version; +} + +static __always_inline +bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, + unsigned version) +{ + /* Make sure that the version is re-read after the data. */ + virt_rmb(); + return unlikely(version != src->version); +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#else + ulong tmp; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif defined(__x86_64__) + __asm__ ( + "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" + : [lo]"=a"(product), + [hi]"=d"(tmp) + : "0"(delta), + [mul_frac]"rm"((u64)mul_frac)); +#else +#error implement me! +#endif + + return product; +} + +static __always_inline +u64 __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u64 tsc) +{ + u64 delta = tsc - src->tsc_timestamp; + u64 offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + return src->system_time + offset; +} + +struct pvclock_vsyscall_time_info { + struct pvclock_vcpu_time_info pvti; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) + +#ifdef CONFIG_PARAVIRT_CLOCK +void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti); +struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void); +#else +static inline struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) +{ + return NULL; +} +#endif + +#endif /* _ASM_X86_PVCLOCK_H */ diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h new file mode 100644 index 000000000..8656b5a6e --- /dev/null +++ b/arch/x86/include/asm/qrwlock.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_QRWLOCK_H +#define _ASM_X86_QRWLOCK_H + +#include +#include + +#endif /* _ASM_X86_QRWLOCK_H */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h new file mode 100644 index 000000000..d87451df4 --- /dev/null +++ b/arch/x86/include/asm/qspinlock.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_QSPINLOCK_H +#define _ASM_X86_QSPINLOCK_H + +#include +#include +#include +#include +#include + +#define _Q_PENDING_LOOPS (1 << 9) + +#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) +{ + u32 val; + + /* + * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto + * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a + * statement expression, which GCC doesn't like. + */ + val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, + "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL; + val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; + + return val; +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); +extern bool nopvspin; + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + kcsan_release(); + pv_queued_spin_unlock(lock); +} + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(long cpu) +{ + return pv_vcpu_is_preempted(cpu); +} +#endif + +#ifdef CONFIG_PARAVIRT +/* + * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack. + * + * Native (and PV wanting native due to vCPU pinning) should disable this key. + * It is done in this backwards fashion to only have a single direction change, + * which removes ordering between native_pv_spin_init() and HV setup. + */ +DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); + +void native_pv_lock_init(void) __init; + +/* + * Shortcut for the queued_spin_lock_slowpath() function that allows + * virt to hijack it. + * + * Returns: + * true - lock has been negotiated, all done; + * false - queued_spin_lock_slowpath() will do its thing. + */ +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + if (!static_branch_likely(&virt_spin_lock_key)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + + return true; +} +#else +static inline void native_pv_lock_init(void) +{ +} +#endif /* CONFIG_PARAVIRT */ + +#include + +#endif /* _ASM_X86_QSPINLOCK_H */ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000..dbb38a6b4 --- /dev/null +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +#include + +/* + * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit + * registers. For i386, however, only 1 32-bit register needs to be saved + * and restored. So an optimized version of __pv_queued_spin_unlock() is + * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. + */ +#ifdef CONFIG_64BIT + +__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" +#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" + +/* + * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock + * which combines the registers saving trunk and the body of the following + * C code. Note that it puts the code in the .spinlock.text section which + * is equivalent to adding __lockfunc in the C code: + * + * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock) + * { + * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); + * + * if (likely(lockval == _Q_LOCKED_VAL)) + * return; + * pv_queued_spin_unlock_slowpath(lock, lockval); + * } + * + * For x86-64, + * rdi = lock (first argument) + * rsi = lockval (second argument) + * rdx = internal variable (set to 0) + */ +asm (".pushsection .spinlock.text, \"ax\";" + ".globl " PV_UNLOCK ";" + ".type " PV_UNLOCK ", @function;" + ".align 4,0x90;" + PV_UNLOCK ": " + ASM_ENDBR + FRAME_BEGIN + "push %rdx;" + "mov $0x1,%eax;" + "xor %edx,%edx;" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" + "cmp $0x1,%al;" + "jne .slowpath;" + "pop %rdx;" + FRAME_END + ASM_RET + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" + "call " PV_UNLOCK_SLOWPATH ";" + "pop %rsi;" + "pop %rdx;" + FRAME_END + ASM_RET + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +#else /* CONFIG_64BIT */ + +extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock); +__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text"); + +#endif /* CONFIG_64BIT */ +#endif diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h new file mode 100644 index 000000000..a336feef0 --- /dev/null +++ b/arch/x86/include/asm/realmode.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_X86_REALMODE_H +#define _ARCH_X86_REALMODE_H + +/* + * Flag bit definitions for use with the flags field of the trampoline header + * in the CONFIG_X86_64 variant. + */ +#define TH_FLAGS_SME_ACTIVE_BIT 0 +#define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT) + +#ifndef __ASSEMBLY__ + +#include +#include + +/* This must match data at realmode/rm/header.S */ +struct real_mode_header { + u32 text_start; + u32 ro_end; + /* SMP trampoline */ + u32 trampoline_start; + u32 trampoline_header; +#ifdef CONFIG_AMD_MEM_ENCRYPT + u32 sev_es_trampoline_start; +#endif +#ifdef CONFIG_X86_64 + u32 trampoline_start64; + u32 trampoline_pgd; +#endif + /* ACPI S3 wakeup */ +#ifdef CONFIG_ACPI_SLEEP + u32 wakeup_start; + u32 wakeup_header; +#endif + /* APM/BIOS reboot */ + u32 machine_real_restart_asm; +#ifdef CONFIG_X86_64 + u32 machine_real_restart_seg; +#endif +}; + +/* This must match data at realmode/rm/trampoline_{32,64}.S */ +struct trampoline_header { +#ifdef CONFIG_X86_32 + u32 start; + u16 gdt_pad; + u16 gdt_limit; + u32 gdt_base; +#else + u64 start; + u64 efer; + u32 cr4; + u32 flags; +#endif +}; + +extern struct real_mode_header *real_mode_header; +extern unsigned char real_mode_blob_end[]; + +extern unsigned long initial_code; +extern unsigned long initial_gs; +extern unsigned long initial_stack; +#ifdef CONFIG_AMD_MEM_ENCRYPT +extern unsigned long initial_vc_handler; +#endif + +extern unsigned char real_mode_blob[]; +extern unsigned char real_mode_relocs[]; + +#ifdef CONFIG_X86_32 +extern unsigned char startup_32_smp[]; +extern unsigned char boot_gdt[]; +#else +extern unsigned char secondary_startup_64[]; +extern unsigned char secondary_startup_64_no_verify[]; +#endif + +static inline size_t real_mode_size_needed(void) +{ + if (real_mode_header) + return 0; /* already allocated. */ + + return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); +} + +static inline void set_real_mode_mem(phys_addr_t mem) +{ + real_mode_header = (struct real_mode_header *) __va(mem); +} + +void reserve_real_mode(void); +void load_trampoline_pgtable(void); +void init_real_mode(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h new file mode 100644 index 000000000..2551baec9 --- /dev/null +++ b/arch/x86/include/asm/reboot.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_REBOOT_H +#define _ASM_X86_REBOOT_H + +#include + +struct pt_regs; + +struct machine_ops { + void (*restart)(char *cmd); + void (*halt)(void); + void (*power_off)(void); + void (*shutdown)(void); + void (*crash_shutdown)(struct pt_regs *); + void (*emergency_restart)(void); +}; + +extern struct machine_ops machine_ops; +extern int crashing_cpu; + +void native_machine_crash_shutdown(struct pt_regs *regs); +void native_machine_shutdown(void); +void __noreturn machine_real_restart(unsigned int type); +/* These must match dispatch in arch/x86/realmore/rm/reboot.S */ +#define MRR_BIOS 0 +#define MRR_APM 1 + +typedef void crash_vmclear_fn(void); +extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; +void cpu_emergency_disable_virtualization(void); + +typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); +void nmi_panic_self_stop(struct pt_regs *regs); +void nmi_shootdown_cpus(nmi_shootdown_cb callback); +void run_crash_ipi_callback(struct pt_regs *regs); + +#endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/include/asm/reboot_fixups.h b/arch/x86/include/asm/reboot_fixups.h new file mode 100644 index 000000000..96515658c --- /dev/null +++ b/arch/x86/include/asm/reboot_fixups.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_REBOOT_FIXUPS_H +#define _ASM_X86_REBOOT_FIXUPS_H + +extern void mach_reboot_fixups(void); + +#endif /* _ASM_X86_REBOOT_FIXUPS_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h new file mode 100644 index 000000000..7ba1726b7 --- /dev/null +++ b/arch/x86/include/asm/required-features.h @@ -0,0 +1,104 @@ +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#define _ASM_X86_REQUIRED_FEATURES_H + +/* Define minimum CPUID feature set for kernel These bits are checked + really early to actually display a visible error message before the + kernel dies. Make sure to assign features to the proper mask! + + Some requirements that are not in CPUID yet are also in the + CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. + + The real information is in arch/x86/Kconfig.cpu, this just converts + the CONFIGs into a bitmask */ + +#ifndef CONFIG_MATH_EMULATION +# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) +#else +# define NEED_FPU 0 +#endif + +#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) +# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) +#else +# define NEED_PAE 0 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) +#else +# define NEED_CX8 0 +#endif + +#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) +# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) +#else +# define NEED_CMOV 0 +#endif + +# define NEED_3DNOW 0 + +#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) +# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) +#else +# define NEED_NOPL 0 +#endif + +#ifdef CONFIG_MATOM +# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) +#else +# define NEED_MOVBE 0 +#endif + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT_XXL +/* Paravirtualized systems may not have PSE or PGE available */ +#define NEED_PSE 0 +#define NEED_PGE 0 +#else +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) +#endif +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) +#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) +#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) +#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) +#define NEED_LM (1<<(X86_FEATURE_LM & 31)) +#else +#define NEED_PSE 0 +#define NEED_MSR 0 +#define NEED_PGE 0 +#define NEED_FXSR 0 +#define NEED_XMM 0 +#define NEED_XMM2 0 +#define NEED_LM 0 +#endif + +#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ + NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ + NEED_XMM|NEED_XMM2) +#define SSE_MASK (NEED_XMM|NEED_XMM2) + +#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) + +#define REQUIRED_MASK2 0 +#define REQUIRED_MASK3 (NEED_NOPL) +#define REQUIRED_MASK4 (NEED_MOVBE) +#define REQUIRED_MASK5 0 +#define REQUIRED_MASK6 0 +#define REQUIRED_MASK7 0 +#define REQUIRED_MASK8 0 +#define REQUIRED_MASK9 0 +#define REQUIRED_MASK10 0 +#define REQUIRED_MASK11 0 +#define REQUIRED_MASK12 0 +#define REQUIRED_MASK13 0 +#define REQUIRED_MASK14 0 +#define REQUIRED_MASK15 0 +#define REQUIRED_MASK16 0 +#define REQUIRED_MASK17 0 +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK19 0 +#define REQUIRED_MASK20 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) + +#endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h new file mode 100644 index 000000000..a4641d68e --- /dev/null +++ b/arch/x86/include/asm/resctrl.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_RESCTRL_H +#define _ASM_X86_RESCTRL_H + +#ifdef CONFIG_X86_CPU_RESCTRL + +#include +#include + +#define IA32_PQR_ASSOC 0x0c8f + +/** + * struct resctrl_pqr_state - State cache for the PQR MSR + * @cur_rmid: The cached Resource Monitoring ID + * @cur_closid: The cached Class Of Service ID + * @default_rmid: The user assigned Resource Monitoring ID + * @default_closid: The user assigned cached Class Of Service ID + * + * The upper 32 bits of IA32_PQR_ASSOC contain closid and the + * lower 10 bits rmid. The update to IA32_PQR_ASSOC always + * contains both parts, so we need to cache them. This also + * stores the user configured per cpu CLOSID and RMID. + * + * The cache also helps to avoid pointless updates if the value does + * not change. + */ +struct resctrl_pqr_state { + u32 cur_rmid; + u32 cur_closid; + u32 default_rmid; + u32 default_closid; +}; + +DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); + +DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); + +/* + * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR + * + * Following considerations are made so that this has minimal impact + * on scheduler hot path: + * - This will stay as no-op unless we are running on an Intel SKU + * which supports resource control or monitoring and we enable by + * mounting the resctrl file system. + * - Caches the per cpu CLOSid/RMID values and does the MSR write only + * when a task with a different CLOSid/RMID is scheduled in. + * - We allocate RMIDs/CLOSids globally in order to keep this as + * simple as possible. + * Must be called with preemption disabled. + */ +static inline void __resctrl_sched_in(struct task_struct *tsk) +{ + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); + u32 closid = state->default_closid; + u32 rmid = state->default_rmid; + u32 tmp; + + /* + * If this task has a closid/rmid assigned, use it. + * Else use the closid/rmid assigned to this cpu. + */ + if (static_branch_likely(&rdt_alloc_enable_key)) { + tmp = READ_ONCE(tsk->closid); + if (tmp) + closid = tmp; + } + + if (static_branch_likely(&rdt_mon_enable_key)) { + tmp = READ_ONCE(tsk->rmid); + if (tmp) + rmid = tmp; + } + + if (closid != state->cur_closid || rmid != state->cur_rmid) { + state->cur_closid = closid; + state->cur_rmid = rmid; + wrmsr(IA32_PQR_ASSOC, rmid, closid); + } +} + +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + unsigned int scale = boot_cpu_data.x86_cache_occ_scale; + + /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ + val /= scale; + return val * scale; +} + +static inline void resctrl_sched_in(struct task_struct *tsk) +{ + if (static_branch_likely(&rdt_enable_key)) + __resctrl_sched_in(tsk); +} + +void resctrl_cpu_detect(struct cpuinfo_x86 *c); + +#else + +static inline void resctrl_sched_in(struct task_struct *tsk) {} +static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} + +#endif /* CONFIG_X86_CPU_RESCTRL */ + +#endif /* _ASM_X86_RESCTRL_H */ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h new file mode 100644 index 000000000..7fa611216 --- /dev/null +++ b/arch/x86/include/asm/rmwcc.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_RMWcc +#define _ASM_X86_RMWcc + +/* This counts to 12. Any more, it will return 13th argument. */ +#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __RMWcc_CONCAT(a, b) a ## b +#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b) + +#define __CLOBBERS_MEM(clb...) "memory", ## clb + +#ifndef __GCC_ASM_FLAG_OUTPUTS__ + +/* Use asm goto */ + +#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ +({ \ + bool c = false; \ + asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + : : [var] "m" (_var), ## __VA_ARGS__ \ + : clobbers : cc_label); \ + if (0) { \ +cc_label: c = true; \ + } \ + c; \ +}) + +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ + +/* Use flags output or a set instruction */ + +#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ +({ \ + bool c; \ + asm volatile (fullop CC_SET(cc) \ + : [var] "+m" (_var), CC_OUT(cc) (c) \ + : __VA_ARGS__ : clobbers); \ + c; \ +}) + +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ + +#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ + __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) + +#define GEN_UNARY_RMWcc_3(op, var, cc) \ + GEN_UNARY_RMWcc_4(op, var, cc, "%[var]") + +#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X) + +#define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \ + __GEN_RMWcc(op " %[val], " arg0, var, cc, \ + __CLOBBERS_MEM(), [val] vcon (_val)) + +#define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \ + GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]") + +#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X) + +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \ + __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \ + __CLOBBERS_MEM(clobbers)) + +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, cc, vcon, _val, clobbers...)\ + __GEN_RMWcc(op " %[val], %[var]\n\t" suffix, var, cc, \ + __CLOBBERS_MEM(clobbers), [val] vcon (_val)) + +#endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h new file mode 100644 index 000000000..fef16e398 --- /dev/null +++ b/arch/x86/include/asm/seccomp.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SECCOMP_H +#define _ASM_X86_SECCOMP_H + +#include + +#ifdef CONFIG_X86_32 +#define __NR_seccomp_sigreturn __NR_sigreturn +#endif + +#ifdef CONFIG_COMPAT +#include +#define __NR_seccomp_read_32 __NR_ia32_read +#define __NR_seccomp_write_32 __NR_ia32_write +#define __NR_seccomp_exit_32 __NR_ia32_exit +#define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn +#endif + +#ifdef CONFIG_X86_64 +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_X86_64 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "x86_64" +# ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_I386 +# define SECCOMP_ARCH_COMPAT_NR IA32_NR_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "ia32" +# endif +/* + * x32 will have __X32_SYSCALL_BIT set in syscall number. We don't support + * caching them and they are treated as out of range syscalls, which will + * always pass through the BPF filter. + */ +#else /* !CONFIG_X86_64 */ +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_I386 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "ia32" +#endif + +#include + +#endif /* _ASM_X86_SECCOMP_H */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h new file mode 100644 index 000000000..3fa87e5e1 --- /dev/null +++ b/arch/x86/include/asm/sections.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SECTIONS_H +#define _ASM_X86_SECTIONS_H + +#include +#include + +extern char __brk_base[], __brk_limit[]; +extern char __end_rodata_aligned[]; + +#if defined(CONFIG_X86_64) +extern char __end_rodata_hpage_align[]; +#endif + +extern char __end_of_kernel_reserve[]; + +extern unsigned long _brk_start, _brk_end; + +#endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h new file mode 100644 index 000000000..2e7890dd5 --- /dev/null +++ b/arch/x86/include/asm/segment.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SEGMENT_H +#define _ASM_X86_SEGMENT_H + +#include +#include +#include + +/* + * Constructor for a conventional segment GDT (or LDT) entry. + * This is a macro so it can be used in initializers. + */ +#define GDT_ENTRY(flags, base, limit) \ + ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ + (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ + (((limit) & _AC(0x000f0000,ULL)) << (48-16)) | \ + (((base) & _AC(0x00ffffff,ULL)) << 16) | \ + (((limit) & _AC(0x0000ffff,ULL)))) + +/* Simple and small GDT entries for booting only: */ + +#define GDT_ENTRY_BOOT_CS 2 +#define GDT_ENTRY_BOOT_DS 3 +#define GDT_ENTRY_BOOT_TSS 4 +#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8) +#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8) +#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8) + +/* + * Bottom two bits of selector give the ring + * privilege level + */ +#define SEGMENT_RPL_MASK 0x3 + +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + +/* User mode is privilege level 3: */ +#define USER_RPL 0x3 + +/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */ +#define SEGMENT_TI_MASK 0x4 +/* LDT segment has TI set ... */ +#define SEGMENT_LDT 0x4 +/* ... GDT has it cleared */ +#define SEGMENT_GDT 0x0 + +#define GDT_ENTRY_INVALID_SEG 0 + +#ifdef CONFIG_X86_32 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null <=== cacheline #1 + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - unused <=== cacheline #2 + * 5 - unused + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * 8 - TLS segment #3 <=== cacheline #3 + * 9 - reserved + * 10 - reserved + * 11 - reserved + * + * ------- start of kernel segments: + * + * 12 - kernel code segment <=== cacheline #4 + * 13 - kernel data segment + * 14 - default user CS + * 15 - default user DS + * 16 - TSS <=== cacheline #5 + * 17 - LDT + * 18 - PNPBIOS support (16->32 gate) + * 19 - PNPBIOS support + * 20 - PNPBIOS support <=== cacheline #6 + * 21 - PNPBIOS support + * 22 - PNPBIOS support + * 23 - APM BIOS support + * 24 - APM BIOS support <=== cacheline #7 + * 25 - APM BIOS support + * + * 26 - ESPFIX small SS + * 27 - per-cpu [ offset to per-cpu data area ] + * 28 - unused + * 29 - unused + * 30 - unused + * 31 - TSS for double fault handler + */ +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) + +#define GDT_ENTRY_KERNEL_CS 12 +#define GDT_ENTRY_KERNEL_DS 13 +#define GDT_ENTRY_DEFAULT_USER_CS 14 +#define GDT_ENTRY_DEFAULT_USER_DS 15 +#define GDT_ENTRY_TSS 16 +#define GDT_ENTRY_LDT 17 +#define GDT_ENTRY_PNPBIOS_CS32 18 +#define GDT_ENTRY_PNPBIOS_CS16 19 +#define GDT_ENTRY_PNPBIOS_DS 20 +#define GDT_ENTRY_PNPBIOS_TS1 21 +#define GDT_ENTRY_PNPBIOS_TS2 22 +#define GDT_ENTRY_APMBIOS_BASE 23 + +#define GDT_ENTRY_ESPFIX_SS 26 +#define GDT_ENTRY_PERCPU 27 + +#define GDT_ENTRY_DOUBLEFAULT_TSS 31 + +/* + * Number of entries in the GDT table: + */ +#define GDT_ENTRIES 32 + +/* + * Segment selector values corresponding to the above entries: + */ + +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) +#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) + +/* segment for calling fn: */ +#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8) +/* code segment for BIOS: */ +#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8) + +/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32) + +/* data segment for BIOS: */ +#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8) +/* transfer data segment: */ +#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8) +/* another data segment: */ +#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8) + +#ifdef CONFIG_SMP +# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8) +#else +# define __KERNEL_PERCPU 0 +#endif + +#else /* 64-bit: */ + +#include + +#define GDT_ENTRY_KERNEL32_CS 1 +#define GDT_ENTRY_KERNEL_CS 2 +#define GDT_ENTRY_KERNEL_DS 3 + +/* + * We cannot use the same code segment descriptor for user and kernel mode, + * not even in long flat mode, because of different DPL. + * + * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes + * selectors: + * + * if returning to 32-bit userspace: cs = STAR.SYSRET_CS, + * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16, + * + * ss = STAR.SYSRET_CS+8 (in either case) + * + * thus USER_DS should be between 32-bit and 64-bit code selectors: + */ +#define GDT_ENTRY_DEFAULT_USER32_CS 4 +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define GDT_ENTRY_DEFAULT_USER_CS 6 + +/* Needs two entries */ +#define GDT_ENTRY_TSS 8 +/* Needs two entries */ +#define GDT_ENTRY_LDT 10 + +#define GDT_ENTRY_TLS_MIN 12 +#define GDT_ENTRY_TLS_MAX 14 + +#define GDT_ENTRY_CPUNODE 15 + +/* + * Number of entries in the GDT table: + */ +#define GDT_ENTRIES 16 + +/* + * Segment selector values corresponding to the above entries: + * + * Note, selectors also need to have a correct RPL, + * expressed with the +3 value for user-space selectors: + */ +#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) +#define __USER32_DS __USER_DS +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) +#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) + +#endif + +#define IDT_ENTRIES 256 +#define NUM_EXCEPTION_VECTORS 32 + +/* Bitmask of exception vectors which push an error code on the stack: */ +#define EXCEPTION_ERRCODE_MASK 0x20027d00 + +#define GDT_SIZE (GDT_ENTRIES*8) +#define GDT_ENTRY_TLS_ENTRIES 3 +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) + +#ifdef CONFIG_X86_64 + +/* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ +#define VDSO_CPUNODE_BITS 12 +#define VDSO_CPUNODE_MASK 0xfff + +#ifndef __ASSEMBLY__ + +/* Helper functions to store/load CPU and node numbers */ + +static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) +{ + return (node << VDSO_CPUNODE_BITS) | cpu; +} + +static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) +{ + unsigned int p; + + /* + * Load CPU and node number from the GDT. LSL is faster than RDTSCP + * and works on all CPUs. This is volatile so that it orders + * correctly with respect to barrier() and to keep GCC from cleverly + * hoisting it out of the calling function. + * + * If RDPID is available, use it. + */ + alternative_io ("lsl %[seg],%[p]", + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, + [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + + if (cpu) + *cpu = (p & VDSO_CPUNODE_MASK); + if (node) + *node = (p >> VDSO_CPUNODE_BITS); +} + +#endif /* !__ASSEMBLY__ */ +#endif /* CONFIG_X86_64 */ + +#ifdef __KERNEL__ + +/* + * early_idt_handler_array is an array of entry points referenced in the + * early IDT. For simplicity, it's a real array with one entry point + * every nine bytes. That leaves room for an optional 'push $0' if the + * vector has no error code (two bytes), a 'push $vector_number' (two + * bytes), and a jump to the common entry code (up to five bytes). + */ +#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) + +/* + * xen_early_idt_handler_array is for Xen pv guests: for each entry in + * early_idt_handler_array it contains a prequel in the form of + * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to + * max 8 bytes. + */ +#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) + +#ifndef __ASSEMBLY__ + +extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; +extern void early_ignore_irq(void); + +#ifdef CONFIG_XEN_PV +extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; +#endif + +/* + * Load a segment. Fall back on loading the zero segment if something goes + * wrong. This variant assumes that loading zero fully clears the segment. + * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any + * failure to fully clear the cached descriptor is only observable for + * FS and GS. + */ +#define __loadsegment_simple(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\ + : "+r" (__val) : : "memory"); \ +} while (0) + +#define __loadsegment_ss(value) __loadsegment_simple(ss, (value)) +#define __loadsegment_ds(value) __loadsegment_simple(ds, (value)) +#define __loadsegment_es(value) __loadsegment_simple(es, (value)) + +#ifdef CONFIG_X86_32 + +/* + * On 32-bit systems, the hidden parts of FS and GS are unobservable if + * the selector is NULL, so there's no funny business here. + */ +#define __loadsegment_fs(value) __loadsegment_simple(fs, (value)) +#define __loadsegment_gs(value) __loadsegment_simple(gs, (value)) + +#else + +static inline void __loadsegment_fs(unsigned short value) +{ + asm volatile(" \n" + "1: movw %0, %%fs \n" + "2: \n" + + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS) + + : : "rm" (value) : "memory"); +} + +/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */ + +#endif + +#define loadsegment(seg, value) __loadsegment_ ## seg (value) + +/* + * Save a segment register away: + */ +#define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + +#endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h new file mode 100644 index 000000000..ece8299d2 --- /dev/null +++ b/arch/x86/include/asm/serial.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SERIAL_H +#define _ASM_X86_SERIAL_H + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires a faster clock. + */ +#define BASE_BAUD (1843200/16) + +/* Standard COM flags (except for COM4, because of the 8514 problem) */ +#ifdef CONFIG_SERIAL_8250_DETECT_IRQ +# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) +# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | UPF_AUTO_IRQ) +#else +# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | 0 ) +# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | 0 ) +#endif + +#define SERIAL_PORT_DFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { .uart = 0, BASE_BAUD, 0x3F8, 4, STD_COMX_FLAGS }, /* ttyS0 */ \ + { .uart = 0, BASE_BAUD, 0x2F8, 3, STD_COMX_FLAGS }, /* ttyS1 */ \ + { .uart = 0, BASE_BAUD, 0x3E8, 4, STD_COMX_FLAGS }, /* ttyS2 */ \ + { .uart = 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ + +#endif /* _ASM_X86_SERIAL_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h new file mode 100644 index 000000000..b45c4d27f --- /dev/null +++ b/arch/x86/include/asm/set_memory.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SET_MEMORY_H +#define _ASM_X86_SET_MEMORY_H + +#include +#include +#include + +/* + * The set_memory_* API can be used to change various attributes of a virtual + * address range. The attributes include: + * Cacheability : UnCached, WriteCombining, WriteThrough, WriteBack + * Executability : eXecutable, NoteXecutable + * Read/Write : ReadOnly, ReadWrite + * Presence : NotPresent + * Encryption : Encrypted, Decrypted + * + * Within a category, the attributes are mutually exclusive. + * + * The implementation of this API will take care of various aspects that + * are associated with changing such attributes, such as: + * - Flushing TLBs + * - Flushing CPU caches + * - Making sure aliases of the memory behind the mapping don't violate + * coherency rules as defined by the CPU in the system. + * + * What this API does not do: + * - Provide exclusion between various callers - including callers that + * operation on other mappings of the same physical page + * - Restore default attributes when a page is freed + * - Guarantee that mappings other than the requested one are + * in any state, other than that these do not violate rules for + * the CPU you have. Do not depend on any effects on other mappings, + * CPUs other than the one you have may have more relaxed rules. + * The caller is required to take care of these. + */ + +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); +int _set_memory_uc(unsigned long addr, int numpages); +int _set_memory_wc(unsigned long addr, int numpages); +int _set_memory_wt(unsigned long addr, int numpages); +int _set_memory_wb(unsigned long addr, int numpages); +int set_memory_uc(unsigned long addr, int numpages); +int set_memory_wc(unsigned long addr, int numpages); +int set_memory_wb(unsigned long addr, int numpages); +int set_memory_np(unsigned long addr, int numpages); +int set_memory_4k(unsigned long addr, int numpages); +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); +int set_memory_np_noalias(unsigned long addr, int numpages); +int set_memory_nonglobal(unsigned long addr, int numpages); +int set_memory_global(unsigned long addr, int numpages); + +int set_pages_array_uc(struct page **pages, int addrinarray); +int set_pages_array_wc(struct page **pages, int addrinarray); +int set_pages_array_wb(struct page **pages, int addrinarray); + +/* + * For legacy compatibility with the old APIs, a few functions + * are provided that work on a "struct page". + * These functions operate ONLY on the 1:1 kernel mapping of the + * memory that the struct page represents, and internally just + * call the set_memory_* function. See the description of the + * set_memory_* function for more details on conventions. + * + * These APIs should be considered *deprecated* and are likely going to + * be removed in the future. + * The reason for this is the implicit operation on the 1:1 mapping only, + * making this not a generally useful API. + * + * Specifically, many users of the old APIs had a virtual address, + * called virt_to_page() or vmalloc_to_page() on that address to + * get a struct page* that the old API required. + * To convert these cases, use set_memory_*() on the original + * virtual address, do not use these functions. + */ + +int set_pages_uc(struct page *page, int numpages); +int set_pages_wb(struct page *page, int numpages); +int set_pages_ro(struct page *page, int numpages); +int set_pages_rw(struct page *page, int numpages); + +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); +bool kernel_page_present(struct page *page); + +extern int kernel_set_to_readonly; + +#endif /* _ASM_X86_SET_MEMORY_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h new file mode 100644 index 000000000..f37cbff73 --- /dev/null +++ b/arch/x86/include/asm/setup.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SETUP_H +#define _ASM_X86_SETUP_H + +#include + +#define COMMAND_LINE_SIZE 2048 + +#include +#include +#include + +#ifdef __i386__ + +#include +/* + * Reserved space for vmalloc and iomap - defined in asm/page.h + */ +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#define MAX_NONPAE_PFN (1 << 20) + +#endif /* __i386__ */ + +#define PARAM_SIZE 4096 /* sizeof(struct boot_params) */ + +#define OLD_CL_MAGIC 0xA33F +#define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */ +#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ + +#ifndef __ASSEMBLY__ +#include +#include + +extern u64 relocated_ramdisk; + +/* Interrupt control for vSMPowered x86_64 systems */ +#ifdef CONFIG_X86_64 +void vsmp_init(void); +#else +static inline void vsmp_init(void) { } +#endif + +struct pt_regs; + +void setup_bios_corruption_check(void); +void early_platform_quirks(void); + +extern unsigned long saved_video_mode; + +extern void reserve_standard_io_resources(void); +extern void i386_reserve_resources(void); +extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp); +extern void startup_64_setup_env(unsigned long physbase); +extern void early_setup_idt(void); +extern void __init do_early_exception(struct pt_regs *regs, int trapnr); + +#ifdef CONFIG_X86_INTEL_MID +extern void x86_intel_mid_early_setup(void); +#else +static inline void x86_intel_mid_early_setup(void) { } +#endif + +#ifdef CONFIG_X86_INTEL_CE +extern void x86_ce4100_early_setup(void); +#else +static inline void x86_ce4100_early_setup(void) { } +#endif + +#ifndef _SETUP + +#include +#include + +/* + * This is set up by the setup-routine at boot-time + */ +extern struct boot_params boot_params; +extern char _text[]; + +static inline bool kaslr_enabled(void) +{ + return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) && + !!(boot_params.hdr.loadflags & KASLR_FLAG); +} + +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); +} + +static inline unsigned long kaslr_offset(void) +{ + return (unsigned long)&_text - __START_KERNEL; +} + +/* + * Do NOT EVER look at the BIOS memory size location. + * It does not work on many machines. + */ +#define LOWMEMSIZE() (0x9f000) + +/* exceedingly early brk-like allocator */ +extern unsigned long _brk_end; +void *extend_brk(size_t size, size_t align); + +/* + * Reserve space in the .brk section, which is a block of memory from which the + * caller is allowed to allocate very early (before even memblock is available) + * by calling extend_brk(). All allocated memory will be eventually converted + * to memblock. Any leftover unallocated memory will be freed. + * + * The size is in bytes. + */ +#define RESERVE_BRK(name, size) \ + __section(".bss..brk") __aligned(1) __used \ + static char __brk_##name[size] + +extern void probe_roms(void); + +void clear_bss(void); + +#ifdef __i386__ + +asmlinkage void __init i386_start_kernel(void); + +#else +asmlinkage void __init x86_64_start_kernel(char *real_mode); +asmlinkage void __init x86_64_start_reservations(char *real_mode_data); + +#endif /* __i386__ */ +#endif /* _SETUP */ + +#else /* __ASSEMBLY */ + +.macro __RESERVE_BRK name, size + .pushsection .bss..brk, "aw" +SYM_DATA_START(__brk_\name) + .skip \size +SYM_DATA_END(__brk_\name) + .popsection +.endm + +#define RESERVE_BRK(name, size) __RESERVE_BRK name, size + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/setup_arch.h b/arch/x86/include/asm/setup_arch.h new file mode 100644 index 000000000..38846208b --- /dev/null +++ b/arch/x86/include/asm/setup_arch.h @@ -0,0 +1,3 @@ +/* Hook to call BIOS initialisation function */ + +/* no action for generic */ diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 000000000..0759af9b1 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_DATA_LOW 12 +#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1) + +#define GHCB_DATA(v) \ + (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW) + +/* SEV Information Request/Response */ +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 + +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + /* GHCBData[63:48] */ \ + ((((_max) & 0xffff) << 48) | \ + /* GHCBData[47:32] */ \ + (((_min) & 0xffff) << 32) | \ + /* GHCBData[31:24] */ \ + (((_cbit) & 0xff) << 24) | \ + GHCB_MSR_SEV_INFO_RESP) + +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> 48) & 0xffff) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> 32) & 0xffff) + +/* CPUID Request/Response */ +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + /* GHCBData[11:0] */ \ + (GHCB_MSR_CPUID_REQ | \ + /* GHCBData[31:12] */ \ + (((unsigned long)(reg) & 0x3) << 30) | \ + /* GHCBData[63:32] */ \ + (((unsigned long)fn) << 32)) + +/* AP Reset Hold */ +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 + +/* GHCB GPA Register */ +#define GHCB_MSR_REG_GPA_REQ 0x012 +#define GHCB_MSR_REG_GPA_REQ_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)((v) & GENMASK_ULL(51, 0)) << 12) | \ + /* GHCBData[11:0] */ \ + GHCB_MSR_REG_GPA_REQ) + +#define GHCB_MSR_REG_GPA_RESP 0x013 +#define GHCB_MSR_REG_GPA_RESP_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) + +/* + * SNP Page State Change Operation + * + * GHCBData[55:52] - Page operation: + * 0x0001 Page assignment, Private + * 0x0002 Page assignment, Shared + */ +enum psc_op { + SNP_PAGE_STATE_PRIVATE = 1, + SNP_PAGE_STATE_SHARED, +}; + +#define GHCB_MSR_PSC_REQ 0x014 +#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \ + /* GHCBData[55:52] */ \ + (((u64)((op) & 0xf) << 52) | \ + /* GHCBData[51:12] */ \ + ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) | \ + /* GHCBData[11:0] */ \ + GHCB_MSR_PSC_REQ) + +#define GHCB_MSR_PSC_RESP 0x015 +#define GHCB_MSR_PSC_RESP_VAL(val) \ + /* GHCBData[63:32] */ \ + (((u64)(val) & GENMASK_ULL(63, 32)) >> 32) + +/* GHCB Hypervisor Feature Request/Response */ +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_RESP_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) + +#define GHCB_HV_FT_SNP BIT_ULL(0) +#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) + +/* SNP Page State Change NAE event */ +#define VMGEXIT_PSC_MAX_ENTRY 253 + +struct psc_hdr { + u16 cur_entry; + u16 end_entry; + u32 reserved; +} __packed; + +struct psc_entry { + u64 cur_page : 12, + gfn : 40, + operation : 4, + pagesize : 1, + reserved : 7; +} __packed; + +struct snp_psc_desc { + struct psc_hdr hdr; + struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; +} __packed; + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff + +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + /* GHCBData[15:12] */ \ + (((((u64)reason_set) & 0xf) << 12) | \ + /* GHCBData[23:16] */ \ + ((((u64)reason_val) & 0xff) << 16)) + +/* Error codes from reason set 0 */ +#define SEV_TERM_SET_GEN 0 +#define GHCB_SEV_ES_GEN_REQ 0 +#define GHCB_SEV_ES_PROT_UNSUPPORTED 1 +#define GHCB_SNP_UNSUPPORTED 2 + +/* Linux-specific reason codes (used with reason set 1) */ +#define SEV_TERM_SET_LINUX 1 +#define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */ +#define GHCB_TERM_PSC 1 /* Page State Change failure */ +#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */ +#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ +#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */ +#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */ + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +/* + * Error codes related to GHCB input that can be communicated back to the guest + * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. + */ +#define GHCB_ERR_NOT_REGISTERED 1 +#define GHCB_ERR_INVALID_USAGE 2 +#define GHCB_ERR_INVALID_SCRATCH_AREA 3 +#define GHCB_ERR_MISSING_INPUT 4 +#define GHCB_ERR_INVALID_INPUT 5 +#define GHCB_ERR_INVALID_EVENT 6 + +#endif diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h new file mode 100644 index 000000000..7ca5c9ec8 --- /dev/null +++ b/arch/x86/include/asm/sev.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + */ + +#ifndef __ASM_ENCRYPTED_STATE_H +#define __ASM_ENCRYPTED_STATE_H + +#include +#include + +#include +#include +#include + +#define GHCB_PROTOCOL_MIN 1ULL +#define GHCB_PROTOCOL_MAX 2ULL +#define GHCB_DEFAULT_USAGE 0ULL + +#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } + +enum es_result { + ES_OK, /* All good */ + ES_UNSUPPORTED, /* Requested operation not supported */ + ES_VMM_ERROR, /* Unexpected state from the VMM */ + ES_DECODE_FAILED, /* Instruction decoding failed */ + ES_EXCEPTION, /* Instruction caused exception */ + ES_RETRY, /* Retry instruction emulation */ +}; + +struct es_fault_info { + unsigned long vector; + unsigned long error_code; + unsigned long cr2; +}; + +struct pt_regs; + +/* ES instruction emulation context */ +struct es_em_ctxt { + struct pt_regs *regs; + struct insn insn; + struct es_fault_info fi; +}; + +/* + * AMD SEV Confidential computing blob structure. The structure is + * defined in OVMF UEFI firmware header: + * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h + */ +#define CC_BLOB_SEV_HDR_MAGIC 0x45444d41 +struct cc_blob_sev_info { + u32 magic; + u16 version; + u16 reserved; + u64 secrets_phys; + u32 secrets_len; + u32 rsvd1; + u64 cpuid_phys; + u32 cpuid_len; + u32 rsvd2; +} __packed; + +void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); + +static inline u64 lower_bits(u64 val, unsigned int bits) +{ + u64 mask = (1ULL << bits) - 1; + + return (val & mask); +} + +struct real_mode_header; +enum stack_type; + +/* Early IDT entry points for #VC handler */ +extern void vc_no_ghcb(void); +extern void vc_boot_ghcb(void); +extern bool handle_vc_boot_ghcb(struct pt_regs *regs); + +/* Software defined (when rFlags.CF = 1) */ +#define PVALIDATE_FAIL_NOUPDATE 255 + +/* RMP page size */ +#define RMP_PG_SIZE_4K 0 + +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) + +/* SNP Guest message request */ +struct snp_req_data { + unsigned long req_gpa; + unsigned long resp_gpa; + unsigned long data_gpa; + unsigned int data_npages; +}; + +struct sev_guest_platform_data { + u64 secrets_gpa; +}; + +/* + * The secrets page contains 96-bytes of reserved field that can be used by + * the guest OS. The guest OS uses the area to save the message sequence + * number for each VMPCK. + * + * See the GHCB spec section Secret page layout for the format for this area. + */ +struct secrets_os_area { + u32 msg_seqno_0; + u32 msg_seqno_1; + u32 msg_seqno_2; + u32 msg_seqno_3; + u64 ap_jump_table_pa; + u8 rsvd[40]; + u8 guest_usage[32]; +} __packed; + +#define VMPCK_KEY_LEN 32 + +/* See the SNP spec version 0.9 for secrets page format */ +struct snp_secrets_page_layout { + u32 version; + u32 imien : 1, + rsvd1 : 31; + u32 fms; + u32 rsvd2; + u8 gosvw[16]; + u8 vmpck0[VMPCK_KEY_LEN]; + u8 vmpck1[VMPCK_KEY_LEN]; + u8 vmpck2[VMPCK_KEY_LEN]; + u8 vmpck3[VMPCK_KEY_LEN]; + struct secrets_os_area os_area; + u8 rsvd3[3840]; +} __packed; + +#ifdef CONFIG_AMD_MEM_ENCRYPT +extern struct static_key_false sev_es_enable_key; +extern void __sev_es_ist_enter(struct pt_regs *regs); +extern void __sev_es_ist_exit(void); +static __always_inline void sev_es_ist_enter(struct pt_regs *regs) +{ + if (static_branch_unlikely(&sev_es_enable_key)) + __sev_es_ist_enter(regs); +} +static __always_inline void sev_es_ist_exit(void) +{ + if (static_branch_unlikely(&sev_es_enable_key)) + __sev_es_ist_exit(); +} +extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh); +extern void __sev_es_nmi_complete(void); +static __always_inline void sev_es_nmi_complete(void) +{ + if (static_branch_unlikely(&sev_es_enable_key)) + __sev_es_nmi_complete(); +} +extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); + +static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) +{ + int rc; + + /* "rmpadjust" mnemonic support in binutils 2.36 and newer */ + asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t" + : "=a"(rc) + : "a"(vaddr), "c"(rmp_psize), "d"(attrs) + : "memory", "cc"); + + return rc; +} +static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) +{ + bool no_rmpupdate; + int rc; + + /* "pvalidate" mnemonic support in binutils 2.36 and newer */ + asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t" + CC_SET(c) + : CC_OUT(c) (no_rmpupdate), "=a"(rc) + : "a"(vaddr), "c"(rmp_psize), "d"(validate) + : "memory", "cc"); + + if (no_rmpupdate) + return PVALIDATE_FAIL_NOUPDATE; + + return rc; +} + +struct snp_guest_request_ioctl; + +void setup_ghcb(void); +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); +void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); +void snp_set_memory_private(unsigned long vaddr, unsigned long npages); +void snp_set_wakeup_secondary_cpu(void); +bool snp_init(struct boot_params *bp); +void __init __noreturn snp_abort(void); +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +#else +static inline void sev_es_ist_enter(struct pt_regs *regs) { } +static inline void sev_es_ist_exit(void) { } +static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } +static inline void sev_es_nmi_complete(void) { } +static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } +static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } +static inline void setup_ghcb(void) { } +static inline void __init +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } +static inline void __init +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } +static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } +static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } +static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } +static inline void snp_set_wakeup_secondary_cpu(void) { } +static inline bool snp_init(struct boot_params *bp) { return false; } +static inline void snp_abort(void) { } +static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) +{ + return -ENOTTY; +} +#endif + +#endif diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h new file mode 100644 index 000000000..eae20fa52 --- /dev/null +++ b/arch/x86/include/asm/sgx.h @@ -0,0 +1,404 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** + * Copyright(c) 2016-20 Intel Corporation. + * + * Intel Software Guard Extensions (SGX) support. + */ +#ifndef _ASM_X86_SGX_H +#define _ASM_X86_SGX_H + +#include +#include + +/* + * This file contains both data structures defined by SGX architecture and Linux + * defined software data structures and functions. The two should not be mixed + * together for better readability. The architectural definitions come first. + */ + +/* The SGX specific CPUID function. */ +#define SGX_CPUID 0x12 +/* EPC enumeration. */ +#define SGX_CPUID_EPC 2 +/* An invalid EPC section, i.e. the end marker. */ +#define SGX_CPUID_EPC_INVALID 0x0 +/* A valid EPC section. */ +#define SGX_CPUID_EPC_SECTION 0x1 +/* The bitmask for the EPC section type. */ +#define SGX_CPUID_EPC_MASK GENMASK(3, 0) + +enum sgx_encls_function { + ECREATE = 0x00, + EADD = 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU = 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, + EAUG = 0x0D, + EMODPR = 0x0E, + EMODT = 0x0F, +}; + +/** + * SGX_ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr + * + * ENCLS has its own (positive value) error codes and also generates + * ENCLS specific #GP and #PF faults. And the ENCLS values get munged + * with system error codes as everything percolates back up the stack. + * Unfortunately (for us), we need to precisely identify each unique + * error code, e.g. the action taken if EWB fails varies based on the + * type of fault and on the exact SGX error code, i.e. we can't simply + * convert all faults to -EFAULT. + * + * To make all three error types coexist, we set bit 30 to identify an + * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate + * between positive (faults and SGX error codes) and negative (system + * error codes) values. + */ +#define SGX_ENCLS_FAULT_FLAG 0x40000000 + +/** + * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV + * %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function. + * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not + * been completed yet. + * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. + * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's + * public key does not match IA32_SGXLEPUBKEYHASH. + * %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it + * is in the PENDING or MODIFIED state. + * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received + */ +enum sgx_return_code { + SGX_EPC_PAGE_CONFLICT = 7, + SGX_NOT_TRACKED = 11, + SGX_CHILD_PRESENT = 13, + SGX_INVALID_EINITTOKEN = 16, + SGX_PAGE_NOT_MODIFIABLE = 20, + SGX_UNMASKED_EVENT = 128, +}; + +/* The modulus size for 3072-bit RSA keys. */ +#define SGX_MODULUS_SIZE 384 + +/** + * enum sgx_miscselect - additional information to an SSA frame + * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. + * + * Save State Area (SSA) is a stack inside the enclave used to store processor + * state when an exception or interrupt occurs. This enum defines additional + * information stored to an SSA frame. + */ +enum sgx_miscselect { + SGX_MISC_EXINFO = BIT(0), +}; + +#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1) + +#define SGX_SSA_GPRS_SIZE 184 +#define SGX_SSA_MISC_EXINFO_SIZE 16 + +/** + * enum sgx_attributes - the attributes field in &struct sgx_secs + * %SGX_ATTR_INIT: Enclave can be entered (is initialized). + * %SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). + * %SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. + * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote + * attestation. + * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). + * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to + * sign cryptographic tokens that can be passed to + * EINIT as an authorization to run an enclave. + */ +enum sgx_attribute { + SGX_ATTR_INIT = BIT(0), + SGX_ATTR_DEBUG = BIT(1), + SGX_ATTR_MODE64BIT = BIT(2), + SGX_ATTR_PROVISIONKEY = BIT(4), + SGX_ATTR_EINITTOKENKEY = BIT(5), + SGX_ATTR_KSS = BIT(7), +}; + +#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | BIT_ULL(6) | GENMASK_ULL(63, 8)) + +/** + * struct sgx_secs - SGX Enclave Control Structure (SECS) + * @size: size of the address space + * @base: base address of the address space + * @ssa_frame_size: size of an SSA frame + * @miscselect: additional information stored to an SSA frame + * @attributes: attributes for enclave + * @xfrm: XSave-Feature Request Mask (subset of XCR0) + * @mrenclave: SHA256-hash of the enclave contents + * @mrsigner: SHA256-hash of the public key used to sign the SIGSTRUCT + * @config_id: a user-defined value that is used in key derivation + * @isv_prod_id: a user-defined value that is used in key derivation + * @isv_svn: a user-defined value that is used in key derivation + * @config_svn: a user-defined value that is used in key derivation + * + * SGX Enclave Control Structure (SECS) is a special enclave page that is not + * visible in the address space. In fact, this structure defines the address + * range and other global attributes for the enclave and it is the first EPC + * page created for any enclave. It is moved from a temporary buffer to an EPC + * by the means of ENCLS[ECREATE] function. + */ +struct sgx_secs { + u64 size; + u64 base; + u32 ssa_frame_size; + u32 miscselect; + u8 reserved1[24]; + u64 attributes; + u64 xfrm; + u32 mrenclave[8]; + u8 reserved2[32]; + u32 mrsigner[8]; + u8 reserved3[32]; + u32 config_id[16]; + u16 isv_prod_id; + u16 isv_svn; + u16 config_svn; + u8 reserved4[3834]; +} __packed; + +/** + * enum sgx_tcs_flags - execution flags for TCS + * %SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints + * inside an enclave. It is cleared by EADD but can + * be set later with EDBGWR. + */ +enum sgx_tcs_flags { + SGX_TCS_DBGOPTIN = 0x01, +}; + +#define SGX_TCS_RESERVED_MASK GENMASK_ULL(63, 1) +#define SGX_TCS_RESERVED_SIZE 4024 + +/** + * struct sgx_tcs - Thread Control Structure (TCS) + * @state: used to mark an entered TCS + * @flags: execution flags (cleared by EADD) + * @ssa_offset: SSA stack offset relative to the enclave base + * @ssa_index: the current SSA frame index (cleard by EADD) + * @nr_ssa_frames: the number of frame in the SSA stack + * @entry_offset: entry point offset relative to the enclave base + * @exit_addr: address outside the enclave to exit on an exception or + * interrupt + * @fs_offset: offset relative to the enclave base to become FS + * segment inside the enclave + * @gs_offset: offset relative to the enclave base to become GS + * segment inside the enclave + * @fs_limit: size to become a new FS-limit (only 32-bit enclaves) + * @gs_limit: size to become a new GS-limit (only 32-bit enclaves) + * + * Thread Control Structure (TCS) is an enclave page visible in its address + * space that defines an entry point inside the enclave. A thread enters inside + * an enclave by supplying address of TCS to ENCLU(EENTER). A TCS can be entered + * by only one thread at a time. + */ +struct sgx_tcs { + u64 state; + u64 flags; + u64 ssa_offset; + u32 ssa_index; + u32 nr_ssa_frames; + u64 entry_offset; + u64 exit_addr; + u64 fs_offset; + u64 gs_offset; + u32 fs_limit; + u32 gs_limit; + u8 reserved[SGX_TCS_RESERVED_SIZE]; +} __packed; + +/** + * struct sgx_pageinfo - an enclave page descriptor + * @addr: address of the enclave page + * @contents: pointer to the page contents + * @metadata: pointer either to a SECINFO or PCMD instance + * @secs: address of the SECS page + */ +struct sgx_pageinfo { + u64 addr; + u64 contents; + u64 metadata; + u64 secs; +} __packed __aligned(32); + + +/** + * enum sgx_page_type - bits in the SECINFO flags defining the page type + * %SGX_PAGE_TYPE_SECS: a SECS page + * %SGX_PAGE_TYPE_TCS: a TCS page + * %SGX_PAGE_TYPE_REG: a regular page + * %SGX_PAGE_TYPE_VA: a VA page + * %SGX_PAGE_TYPE_TRIM: a page in trimmed state + * + * Make sure when making changes to this enum that its values can still fit + * in the bitfield within &struct sgx_encl_page + */ +enum sgx_page_type { + SGX_PAGE_TYPE_SECS, + SGX_PAGE_TYPE_TCS, + SGX_PAGE_TYPE_REG, + SGX_PAGE_TYPE_VA, + SGX_PAGE_TYPE_TRIM, +}; + +#define SGX_NR_PAGE_TYPES 5 +#define SGX_PAGE_TYPE_MASK GENMASK(7, 0) + +/** + * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo + * %SGX_SECINFO_R: allow read + * %SGX_SECINFO_W: allow write + * %SGX_SECINFO_X: allow execution + * %SGX_SECINFO_SECS: a SECS page + * %SGX_SECINFO_TCS: a TCS page + * %SGX_SECINFO_REG: a regular page + * %SGX_SECINFO_VA: a VA page + * %SGX_SECINFO_TRIM: a page in trimmed state + */ +enum sgx_secinfo_flags { + SGX_SECINFO_R = BIT(0), + SGX_SECINFO_W = BIT(1), + SGX_SECINFO_X = BIT(2), + SGX_SECINFO_SECS = (SGX_PAGE_TYPE_SECS << 8), + SGX_SECINFO_TCS = (SGX_PAGE_TYPE_TCS << 8), + SGX_SECINFO_REG = (SGX_PAGE_TYPE_REG << 8), + SGX_SECINFO_VA = (SGX_PAGE_TYPE_VA << 8), + SGX_SECINFO_TRIM = (SGX_PAGE_TYPE_TRIM << 8), +}; + +#define SGX_SECINFO_PERMISSION_MASK GENMASK_ULL(2, 0) +#define SGX_SECINFO_PAGE_TYPE_MASK (SGX_PAGE_TYPE_MASK << 8) +#define SGX_SECINFO_RESERVED_MASK ~(SGX_SECINFO_PERMISSION_MASK | \ + SGX_SECINFO_PAGE_TYPE_MASK) + +/** + * struct sgx_secinfo - describes attributes of an EPC page + * @flags: permissions and type + * + * Used together with ENCLS leaves that add or modify an EPC page to an + * enclave to define page permissions and type. + */ +struct sgx_secinfo { + u64 flags; + u8 reserved[56]; +} __packed __aligned(64); + +#define SGX_PCMD_RESERVED_SIZE 40 + +/** + * struct sgx_pcmd - Paging Crypto Metadata (PCMD) + * @enclave_id: enclave identifier + * @mac: MAC over PCMD, page contents and isvsvn + * + * PCMD is stored for every swapped page to the regular memory. When ELDU loads + * the page back it recalculates the MAC by using a isvsvn number stored in a + * VA page. Together these two structures bring integrity and rollback + * protection. + */ +struct sgx_pcmd { + struct sgx_secinfo secinfo; + u64 enclave_id; + u8 reserved[SGX_PCMD_RESERVED_SIZE]; + u8 mac[16]; +} __packed __aligned(128); + +#define SGX_SIGSTRUCT_RESERVED1_SIZE 84 +#define SGX_SIGSTRUCT_RESERVED2_SIZE 20 +#define SGX_SIGSTRUCT_RESERVED3_SIZE 32 +#define SGX_SIGSTRUCT_RESERVED4_SIZE 12 + +/** + * struct sgx_sigstruct_header - defines author of the enclave + * @header1: constant byte string + * @vendor: must be either 0x0000 or 0x8086 + * @date: YYYYMMDD in BCD + * @header2: constant byte string + * @swdefined: software defined value + */ +struct sgx_sigstruct_header { + u64 header1[2]; + u32 vendor; + u32 date; + u64 header2[2]; + u32 swdefined; + u8 reserved1[84]; +} __packed; + +/** + * struct sgx_sigstruct_body - defines contents of the enclave + * @miscselect: additional information stored to an SSA frame + * @misc_mask: required miscselect in SECS + * @attributes: attributes for enclave + * @xfrm: XSave-Feature Request Mask (subset of XCR0) + * @attributes_mask: required attributes in SECS + * @xfrm_mask: required XFRM in SECS + * @mrenclave: SHA256-hash of the enclave contents + * @isvprodid: a user-defined value that is used in key derivation + * @isvsvn: a user-defined value that is used in key derivation + */ +struct sgx_sigstruct_body { + u32 miscselect; + u32 misc_mask; + u8 reserved2[20]; + u64 attributes; + u64 xfrm; + u64 attributes_mask; + u64 xfrm_mask; + u8 mrenclave[32]; + u8 reserved3[32]; + u16 isvprodid; + u16 isvsvn; +} __packed; + +/** + * struct sgx_sigstruct - an enclave signature + * @header: defines author of the enclave + * @modulus: the modulus of the public key + * @exponent: the exponent of the public key + * @signature: the signature calculated over the fields except modulus, + * @body: defines contents of the enclave + * @q1: a value used in RSA signature verification + * @q2: a value used in RSA signature verification + * + * Header and body are the parts that are actual signed. The remaining fields + * define the signature of the enclave. + */ +struct sgx_sigstruct { + struct sgx_sigstruct_header header; + u8 modulus[SGX_MODULUS_SIZE]; + u32 exponent; + u8 signature[SGX_MODULUS_SIZE]; + struct sgx_sigstruct_body body; + u8 reserved4[12]; + u8 q1[SGX_MODULUS_SIZE]; + u8 q2[SGX_MODULUS_SIZE]; +} __packed; + +#define SGX_LAUNCH_TOKEN_SIZE 304 + +/* + * Do not put any hardware-defined SGX structure representations below this + * comment! + */ + +#ifdef CONFIG_X86_SGX_KVM +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, + int *trapnr); +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr); +#endif + +int sgx_set_attribute(unsigned long *allowed_attributes, + unsigned int attribute_fd); + +#endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h new file mode 100644 index 000000000..c0ef921c0 --- /dev/null +++ b/arch/x86/include/asm/shared/io.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_IO_H +#define _ASM_X86_SHARED_IO_H + +#include + +#define BUILDIO(bwl, bw, type) \ +static inline void __out##bwl(type value, u16 port) \ +{ \ + asm volatile("out" #bwl " %" #bw "0, %w1" \ + : : "a"(value), "Nd"(port)); \ +} \ + \ +static inline type __in##bwl(u16 port) \ +{ \ + type value; \ + asm volatile("in" #bwl " %w1, %" #bw "0" \ + : "=a"(value) : "Nd"(port)); \ + return value; \ +} + +BUILDIO(b, b, u8) +BUILDIO(w, w, u16) +BUILDIO(l, , u32) +#undef BUILDIO + +#define inb __inb +#define inw __inw +#define inl __inl +#define outb __outb +#define outw __outw +#define outl __outl + +#endif diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h new file mode 100644 index 000000000..1e6ec10b3 --- /dev/null +++ b/arch/x86/include/asm/shared/msr.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_MSR_H +#define _ASM_X86_SHARED_MSR_H + +struct msr { + union { + struct { + u32 l; + u32 h; + }; + u64 q; + }; +}; + +#endif /* _ASM_X86_SHARED_MSR_H */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h new file mode 100644 index 000000000..e53f26228 --- /dev/null +++ b/arch/x86/include/asm/shared/tdx.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_TDX_H +#define _ASM_X86_SHARED_TDX_H + +#include +#include + +#define TDX_HYPERCALL_STANDARD 0 + +#define TDX_HCALL_HAS_OUTPUT BIT(0) +#define TDX_HCALL_ISSUE_STI BIT(1) + +#define TDX_CPUID_LEAF_ID 0x21 +#define TDX_IDENT "IntelTDX " + +#ifndef __ASSEMBLY__ + +/* + * Used in __tdx_hypercall() to pass down and get back registers' values of + * the TDCALL instruction when requesting services from the VMM. + * + * This is a software only structure and not part of the TDX module/VMM ABI. + */ +struct tdx_hypercall_args { + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; +}; + +/* Used to request services from the VMM */ +u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags); + +/* Called from __tdx_hypercall() for unrecoverable failure */ +void __tdx_hypercall_failed(void); + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_SHARED_TDX_H */ diff --git a/arch/x86/include/asm/shmparam.h b/arch/x86/include/asm/shmparam.h new file mode 100644 index 000000000..c4041819c --- /dev/null +++ b/arch/x86/include/asm/shmparam.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHMPARAM_H +#define _ASM_X86_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASM_X86_SHMPARAM_H */ diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h new file mode 100644 index 000000000..140d890c2 --- /dev/null +++ b/arch/x86/include/asm/sigcontext.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SIGCONTEXT_H +#define _ASM_X86_SIGCONTEXT_H + +/* This is a legacy header - all kernel code includes directly. */ + +#include + +#endif /* _ASM_X86_SIGCONTEXT_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h new file mode 100644 index 000000000..84eab2724 --- /dev/null +++ b/arch/x86/include/asm/sigframe.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SIGFRAME_H +#define _ASM_X86_SIGFRAME_H + +#include +#include +#include +#include + +#ifdef CONFIG_X86_32 +#define sigframe_ia32 sigframe +#define rt_sigframe_ia32 rt_sigframe +#define ucontext_ia32 ucontext +#else /* !CONFIG_X86_32 */ + +#ifdef CONFIG_IA32_EMULATION +#include +#endif /* CONFIG_IA32_EMULATION */ + +#endif /* CONFIG_X86_32 */ + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +struct sigframe_ia32 { + u32 pretcode; + int sig; + struct sigcontext_32 sc; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate_32 fpstate_unused; + unsigned int extramask[1]; + char retcode[8]; + /* fp state follows here */ +}; + +struct rt_sigframe_ia32 { + u32 pretcode; + int sig; + u32 pinfo; + u32 puc; +#ifdef CONFIG_IA32_EMULATION + compat_siginfo_t info; +#else /* !CONFIG_IA32_EMULATION */ + struct siginfo info; +#endif /* CONFIG_IA32_EMULATION */ + struct ucontext_ia32 uc; + char retcode[8]; + /* fp state follows here */ +}; +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ + +#ifdef CONFIG_X86_64 + +struct rt_sigframe { + char __user *pretcode; + struct ucontext uc; + struct siginfo info; + /* fp state follows here */ +}; + +#ifdef CONFIG_X86_X32_ABI + +struct ucontext_x32 { + unsigned int uc_flags; + unsigned int uc_link; + compat_stack_t uc_stack; + unsigned int uc__pad0; /* needed for alignment */ + struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +struct rt_sigframe_x32 { + u64 pretcode; + struct ucontext_x32 uc; + compat_siginfo_t info; + /* fp state follows here */ +}; + +#endif /* CONFIG_X86_X32_ABI */ + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h new file mode 100644 index 000000000..65e667279 --- /dev/null +++ b/arch/x86/include/asm/sighandling.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SIGHANDLING_H +#define _ASM_X86_SIGHANDLING_H + +#include +#include +#include + +#include + +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF | X86_EFLAGS_RF) + +void signal_fault(struct pt_regs *regs, void __user *frame, char *where); + +#endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h new file mode 100644 index 000000000..2dfb5fea1 --- /dev/null +++ b/arch/x86/include/asm/signal.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SIGNAL_H +#define _ASM_X86_SIGNAL_H + +#ifndef __ASSEMBLY__ +#include + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define _NSIG 64 + +#ifdef __i386__ +# define _NSIG_BPW 32 +#else +# define _NSIG_BPW 64 +#endif + +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +/* non-uapi in-kernel SA_FLAGS for those indicates ABI for a signal frame */ +#define SA_IA32_ABI 0x02000000u +#define SA_X32_ABI 0x01000000u + +#ifndef CONFIG_COMPAT +#define compat_sigset_t compat_sigset_t +typedef sigset_t compat_sigset_t; +#endif + +#endif /* __ASSEMBLY__ */ +#include +#ifndef __ASSEMBLY__ + +#define __ARCH_HAS_SA_RESTORER + +#include +#include + +#ifdef __i386__ + +#define __HAVE_ARCH_SIG_BITOPS + +#define sigaddset(set,sig) \ + (__builtin_constant_p(sig) \ + ? __const_sigaddset((set), (sig)) \ + : __gen_sigaddset((set), (sig))) + +static inline void __gen_sigaddset(sigset_t *set, int _sig) +{ + asm("btsl %1,%0" : "+m"(*set) : "Ir"(_sig - 1) : "cc"); +} + +static inline void __const_sigaddset(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + set->sig[sig / _NSIG_BPW] |= 1 << (sig % _NSIG_BPW); +} + +#define sigdelset(set, sig) \ + (__builtin_constant_p(sig) \ + ? __const_sigdelset((set), (sig)) \ + : __gen_sigdelset((set), (sig))) + + +static inline void __gen_sigdelset(sigset_t *set, int _sig) +{ + asm("btrl %1,%0" : "+m"(*set) : "Ir"(_sig - 1) : "cc"); +} + +static inline void __const_sigdelset(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + set->sig[sig / _NSIG_BPW] &= ~(1 << (sig % _NSIG_BPW)); +} + +static inline int __const_sigismember(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); +} + +static inline int __gen_sigismember(sigset_t *set, int _sig) +{ + bool ret; + asm("btl %2,%1" CC_SET(c) + : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1)); + return ret; +} + +#define sigismember(set, sig) \ + (__builtin_constant_p(sig) \ + ? __const_sigismember((set), (sig)) \ + : __gen_sigismember((set), (sig))) + +struct pt_regs; + +#else /* __i386__ */ + +#undef __HAVE_ARCH_SIG_BITOPS + +#endif /* !__i386__ */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_SIGNAL_H */ diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h new file mode 100644 index 000000000..a341c878e --- /dev/null +++ b/arch/x86/include/asm/simd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +/* + * may_use_simd - whether it is allowable at this time to issue SIMD + * instructions or access the SIMD register file + */ +static __must_check inline bool may_use_simd(void) +{ + return irq_fpu_usable(); +} diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h new file mode 100644 index 000000000..bab490379 --- /dev/null +++ b/arch/x86/include/asm/smap.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Supervisor Mode Access Prevention support + * + * Copyright (C) 2012 Intel Corporation + * Author: H. Peter Anvin + */ + +#ifndef _ASM_X86_SMAP_H +#define _ASM_X86_SMAP_H + +#include +#include +#include + +/* "Raw" instruction opcodes */ +#define __ASM_CLAC ".byte 0x0f,0x01,0xca" +#define __ASM_STAC ".byte 0x0f,0x01,0xcb" + +#ifdef __ASSEMBLY__ + +#define ASM_CLAC \ + ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP + +#define ASM_STAC \ + ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP + +#else /* __ASSEMBLY__ */ + +static __always_inline void clac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative("", __ASM_CLAC, X86_FEATURE_SMAP); +} + +static __always_inline void stac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative("", __ASM_STAC, X86_FEATURE_SMAP); +} + +static __always_inline unsigned long smap_save(void) +{ + unsigned long flags; + + asm volatile ("# smap_save\n\t" + ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t", + X86_FEATURE_SMAP) + : "=rm" (flags) : : "memory", "cc"); + + return flags; +} + +static __always_inline void smap_restore(unsigned long flags) +{ + asm volatile ("# smap_restore\n\t" + ALTERNATIVE("", "push %0; popf\n\t", + X86_FEATURE_SMAP) + : : "g" (flags) : "memory", "cc"); +} + +/* These macros can be used in asm() statements */ +#define ASM_CLAC \ + ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP) +#define ASM_STAC \ + ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SMAP_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h new file mode 100644 index 000000000..b3b34032e --- /dev/null +++ b/arch/x86/include/asm/smp.h @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SMP_H +#define _ASM_X86_SMP_H +#ifndef __ASSEMBLY__ +#include +#include + +#include +#include + +extern int smp_num_siblings; +extern unsigned int num_processors; + +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); +/* cpus sharing the last level cache: */ +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); +DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); +DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); + +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) +DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); +#endif + +struct task_struct; + +struct smp_ops { + void (*smp_prepare_boot_cpu)(void); + void (*smp_prepare_cpus)(unsigned max_cpus); + void (*smp_cpus_done)(unsigned max_cpus); + + void (*stop_other_cpus)(int wait); + void (*crash_stop_other_cpus)(void); + void (*smp_send_reschedule)(int cpu); + + int (*cpu_up)(unsigned cpu, struct task_struct *tidle); + int (*cpu_disable)(void); + void (*cpu_die)(unsigned int cpu); + void (*play_dead)(void); + + void (*send_call_func_ipi)(const struct cpumask *mask); + void (*send_call_func_single_ipi)(int cpu); +}; + +/* Globals due to paravirt */ +extern void set_cpu_sibling_map(int cpu); + +#ifdef CONFIG_SMP +extern struct smp_ops smp_ops; + +static inline void smp_send_stop(void) +{ + smp_ops.stop_other_cpus(0); +} + +static inline void stop_other_cpus(void) +{ + smp_ops.stop_other_cpus(1); +} + +static inline void smp_prepare_boot_cpu(void) +{ + smp_ops.smp_prepare_boot_cpu(); +} + +static inline void smp_prepare_cpus(unsigned int max_cpus) +{ + smp_ops.smp_prepare_cpus(max_cpus); +} + +static inline void smp_cpus_done(unsigned int max_cpus) +{ + smp_ops.smp_cpus_done(max_cpus); +} + +static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + return smp_ops.cpu_up(cpu, tidle); +} + +static inline int __cpu_disable(void) +{ + return smp_ops.cpu_disable(); +} + +static inline void __cpu_die(unsigned int cpu) +{ + smp_ops.cpu_die(cpu); +} + +static inline void play_dead(void) +{ + smp_ops.play_dead(); +} + +static inline void smp_send_reschedule(int cpu) +{ + smp_ops.smp_send_reschedule(cpu); +} + +static inline void arch_send_call_function_single_ipi(int cpu) +{ + smp_ops.send_call_func_single_ipi(cpu); +} + +static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + smp_ops.send_call_func_ipi(mask); +} + +void cpu_disable_common(void); +void native_smp_prepare_boot_cpu(void); +void smp_prepare_cpus_common(void); +void native_smp_prepare_cpus(unsigned int max_cpus); +void calculate_max_logical_packages(void); +void native_smp_cpus_done(unsigned int max_cpus); +int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int native_cpu_disable(void); +int common_cpu_die(unsigned int cpu); +void native_cpu_die(unsigned int cpu); +void hlt_play_dead(void); +void native_play_dead(void); +void play_dead_common(void); +void wbinvd_on_cpu(int cpu); +int wbinvd_on_all_cpus(void); +void cond_wakeup_cpu0(void); + +void smp_kick_mwait_play_dead(void); + +void native_smp_send_reschedule(int cpu); +void native_send_call_func_ipi(const struct cpumask *mask); +void native_send_call_func_single_ipi(int cpu); +void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); + +void smp_store_boot_cpu_info(void); +void smp_store_cpu_info(int id); + +asmlinkage __visible void smp_reboot_interrupt(void); +__visible void smp_reschedule_interrupt(struct pt_regs *regs); +__visible void smp_call_function_interrupt(struct pt_regs *regs); +__visible void smp_call_function_single_interrupt(struct pt_regs *r); + +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) +#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu) + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) + +#ifdef CONFIG_X86_32 +extern int safe_smp_processor_id(void); +#else +# define safe_smp_processor_id() smp_processor_id() +#endif + +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return per_cpu(cpu_llc_shared_map, cpu); +} + +static inline struct cpumask *cpu_l2c_shared_mask(int cpu) +{ + return per_cpu(cpu_l2c_shared_map, cpu); +} + +#else /* !CONFIG_SMP */ +#define wbinvd_on_cpu(cpu) wbinvd() +static inline int wbinvd_on_all_cpus(void) +{ + wbinvd(); + return 0; +} + +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return (struct cpumask *)cpumask_of(0); +} +#endif /* CONFIG_SMP */ + +extern unsigned disabled_cpus; + +#ifdef CONFIG_X86_LOCAL_APIC +extern int hard_smp_processor_id(void); + +#else /* CONFIG_X86_LOCAL_APIC */ +#define hard_smp_processor_id() 0 +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_DEBUG_NMI_SELFTEST +extern void nmi_selftest(void); +#else +#define nmi_selftest() do { } while (0) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/softirq_stack.h b/arch/x86/include/asm/softirq_stack.h new file mode 100644 index 000000000..889d53d6a --- /dev/null +++ b/arch/x86/include/asm/softirq_stack.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SOFTIRQ_STACK_H +#define _ASM_X86_SOFTIRQ_STACK_H + +#ifdef CONFIG_X86_64 +# include +#else +# include +#endif + +#endif diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h new file mode 100644 index 000000000..1be13b2df --- /dev/null +++ b/arch/x86/include/asm/sparsemem.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPARSEMEM_H +#define _ASM_X86_SPARSEMEM_H + +#include + +#ifdef CONFIG_SPARSEMEM +/* + * generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the flags + * field of the struct page + * + * SECTION_SIZE_BITS 2^n: size of each section + * MAX_PHYSMEM_BITS 2^n: max size of physical address space + * + */ + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE +# define SECTION_SIZE_BITS 29 +# define MAX_PHYSMEM_BITS 36 +# else +# define SECTION_SIZE_BITS 26 +# define MAX_PHYSMEM_BITS 32 +# endif +#else /* CONFIG_X86_32 */ +# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46) +#endif + +#endif /* CONFIG_SPARSEMEM */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +extern int numa_fill_memblks(u64 start, u64 end); +#define numa_fill_memblks numa_fill_memblks +#endif +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000..cb0386fc4 --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include +#include + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_virt_spec_ctrl, false); +} + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_ssbd_mask; + +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +} + +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + +extern void speculation_ctrl_update(unsigned long tif); +extern void speculation_ctrl_update_current(void); + +#endif diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h new file mode 100644 index 000000000..c2e322189 --- /dev/null +++ b/arch/x86/include/asm/special_insns.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECIAL_INSNS_H +#define _ASM_X86_SPECIAL_INSNS_H + + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +/* + * The compiler should not reorder volatile asm statements with respect to each + * other: they should execute in program order. However GCC 4.9.x and 5.x have + * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder + * volatile asm. The write functions are not affected since they have memory + * clobbers preventing reordering. To prevent reads from being reordered with + * respect to writes, use a dummy memory operand. + */ + +#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL) + +void native_write_cr0(unsigned long val); + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER); + return val; +} + +static __always_inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER); + return val; +} + +static __always_inline void native_write_cr2(unsigned long val) +{ + asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); +} + +static inline unsigned long __native_read_cr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; +#ifdef CONFIG_X86_32 + /* + * This could fault if CR4 does not exist. Non-existent CR4 + * is functionally equivalent to CR4 == 0. Keep it simple and pretend + * that CR4 == 0 on CPUs that don't have CR4. + */ + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val) : "0" (0), __FORCE_ORDER); +#else + /* CR4 always exists on x86_64. */ + asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER); +#endif + return val; +} + +void native_write_cr4(unsigned long val); + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +static inline u32 rdpkru(void) +{ + u32 ecx = 0; + u32 edx, pkru; + + /* + * "rdpkru" instruction. Places PKRU contents in to EAX, + * clears EDX and requires that ecx=0. + */ + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (pkru), "=d" (edx) + : "c" (ecx)); + return pkru; +} + +static inline void wrpkru(u32 pkru) +{ + u32 ecx = 0, edx = 0; + + /* + * "wrpkru" instruction. Loads contents in EAX to PKRU, + * requires that ecx = edx = 0. + */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(ecx), "d"(edx)); +} + +#else +static inline u32 rdpkru(void) +{ + return 0; +} + +static inline void wrpkru(u32 pkru) +{ +} +#endif + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +extern asmlinkage void asm_load_gs_index(unsigned int selector); + +static inline void native_load_gs_index(unsigned int selector) +{ + unsigned long flags; + + local_irq_save(flags); + asm_load_gs_index(selector); + local_irq_restore(flags); +} + +static inline unsigned long __read_cr4(void) +{ + return native_read_cr4(); +} + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else + +static inline unsigned long read_cr0(void) +{ + return native_read_cr0(); +} + +static inline void write_cr0(unsigned long x) +{ + native_write_cr0(x); +} + +static __always_inline unsigned long read_cr2(void) +{ + return native_read_cr2(); +} + +static __always_inline void write_cr2(unsigned long x) +{ + native_write_cr2(x); +} + +/* + * Careful! CR3 contains more than just an address. You probably want + * read_cr3_pa() instead. + */ +static inline unsigned long __read_cr3(void) +{ + return __native_read_cr3(); +} + +static inline void write_cr3(unsigned long x) +{ + native_write_cr3(x); +} + +static inline void __write_cr4(unsigned long x) +{ + native_write_cr4(x); +} + +static inline void wbinvd(void) +{ + native_wbinvd(); +} + + +static inline void load_gs_index(unsigned int selector) +{ +#ifdef CONFIG_X86_64 + native_load_gs_index(selector); +#else + loadsegment(gs, selector); +#endif +} + +#endif /* CONFIG_PARAVIRT_XXL */ + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); +} + +static inline void clflushopt(volatile void *__p) +{ + alternative_io(".byte 0x3e; clflush %P0", + ".byte 0x66; clflush %P0", + X86_FEATURE_CLFLUSHOPT, + "+m" (*(volatile char __force *)__p)); +} + +static inline void clwb(volatile void *__p) +{ + volatile struct { char x[64]; } *p = __p; + + asm volatile(ALTERNATIVE_2( + ".byte 0x3e; clflush (%[pax])", + ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ + X86_FEATURE_CLFLUSHOPT, + ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ + X86_FEATURE_CLWB) + : [p] "+m" (*p) + : [pax] "a" (p)); +} + +#define nop() asm volatile ("nop") + +static inline void serialize(void) +{ + /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */ + asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory"); +} + +/* The dst parameter must be 64-bytes aligned */ +static inline void movdir64b(void __iomem *dst, const void *src) +{ + const struct { char _[64]; } *__src = src; + struct { char _[64]; } __iomem *__dst = dst; + + /* + * MOVDIR64B %(rdx), rax. + * + * Both __src and __dst must be memory constraints in order to tell the + * compiler that no other memory accesses should be reordered around + * this one. + * + * Also, both must be supplied as lvalues because this tells + * the compiler what the object is (its size) the instruction accesses. + * I.e., not the pointers but what they point to, thus the deref'ing '*'. + */ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : "+m" (*__dst) + : "m" (*__src), "a" (__dst), "d" (__src)); +} + +/** + * enqcmds - Enqueue a command in supervisor (CPL0) mode + * @dst: destination, in MMIO space (must be 512-bit aligned) + * @src: 512 bits memory operand + * + * The ENQCMDS instruction allows software to write a 512-bit command to + * a 512-bit-aligned special MMIO region that supports the instruction. + * A return status is loaded into the ZF flag in the RFLAGS register. + * ZF = 0 equates to success, and ZF = 1 indicates retry or error. + * + * This function issues the ENQCMDS instruction to submit data from + * kernel space to MMIO space, in a unit of 512 bits. Order of data access + * is not guaranteed, nor is a memory barrier performed afterwards. It + * returns 0 on success and -EAGAIN on failure. + * + * Warning: Do not use this helper unless your driver has checked that the + * ENQCMDS instruction is supported on the platform and the device accepts + * ENQCMDS. + */ +static inline int enqcmds(void __iomem *dst, const void *src) +{ + const struct { char _[64]; } *__src = src; + struct { char _[64]; } __iomem *__dst = dst; + bool zf; + + /* + * ENQCMDS %(rdx), rax + * + * See movdir64b()'s comment on operand specification. + */ + asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90" + CC_SET(z) + : CC_OUT(z) (zf), "+m" (*__dst) + : "m" (*__src), "a" (__dst), "d" (__src)); + + /* Submission failure is indicated via EFLAGS.ZF=1 */ + if (zf) + return -EAGAIN; + + return 0; +} + +static __always_inline void tile_release(void) +{ + /* + * Instruction opcode for TILERELEASE; supported in binutils + * version >= 2.36. + */ + asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0"); +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_SPECIAL_INSNS_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h new file mode 100644 index 000000000..5b6bc7016 --- /dev/null +++ b/arch/x86/include/asm/spinlock.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPINLOCK_H +#define _ASM_X86_SPINLOCK_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + * + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * These are fair FIFO ticket locks, which support up to 2^16 CPUs. + * + * (the type definitions are in asm/spinlock_types.h) + */ + +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + +#include + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + * + * On x86, we implement read-write locks using the generic qrwlock with + * x86 specific optimization. + */ + +#include + +#endif /* _ASM_X86_SPINLOCK_H */ diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h new file mode 100644 index 000000000..323db6c58 --- /dev/null +++ b/arch/x86/include/asm/spinlock_types.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPINLOCK_TYPES_H +#define _ASM_X86_SPINLOCK_TYPES_H + +#include +#include +#include + +#endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h new file mode 100644 index 000000000..e0975e9c4 --- /dev/null +++ b/arch/x86/include/asm/sta2x11.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Header file for STMicroelectronics ConneXt (STA2X11) IOHub + */ +#ifndef __ASM_STA2X11_H +#define __ASM_STA2X11_H + +#include + +/* This needs to be called from the MFD to configure its sub-devices */ +struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev); + +#endif /* __ASM_STA2X11_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h new file mode 100644 index 000000000..24a8d6c4f --- /dev/null +++ b/arch/x86/include/asm/stackprotector.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and unfortunately gcc historically required it to be at a fixed offset + * from the percpu segment base. On x86_64, the offset is 40 bytes. + * + * The same segment is shared by percpu area and stack canary. On + * x86_64, percpu symbols are zero based and %gs (64-bit) points to the + * base of percpu area. The first occupant of the percpu area is always + * fixed_percpu_data which contains stack_canary at the appropriate + * offset. On x86_32, the stack canary is just a regular percpu + * variable. + * + * Putting percpu data in %fs on 32-bit is a minor optimization compared to + * using %gs. Since 32-bit userspace normally has %fs == 0, we are likely + * to load 0 into %fs on exit to usermode, whereas with percpu data in + * %gs, we are likely to load a non-null %gs on return to user mode. + * + * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector + * support, we can remove some of this complexity. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#ifdef CONFIG_STACKPROTECTOR + +#include +#include +#include +#include + +#include +#include + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return + * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. + */ +static __always_inline void boot_init_stack_canary(void) +{ + u64 canary; + u64 tsc; + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); +#endif + /* + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. + */ + get_random_bytes(&canary, sizeof(canary)); + tsc = rdtsc(); + canary += tsc + (tsc << 32UL); + canary &= CANARY_MASK; + + current->stack_canary = canary; +#ifdef CONFIG_X86_64 + this_cpu_write(fixed_percpu_data.stack_canary, canary); +#else + this_cpu_write(__stack_chk_guard, canary); +#endif +} + +static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) +{ +#ifdef CONFIG_X86_64 + per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary; +#else + per_cpu(__stack_chk_guard, cpu) = idle->stack_canary; +#endif +} + +#else /* STACKPROTECTOR */ + +/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ + +static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) +{ } + +#endif /* STACKPROTECTOR */ +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h new file mode 100644 index 000000000..3881b5333 --- /dev/null +++ b/arch/x86/include/asm/stacktrace.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ + +#ifndef _ASM_X86_STACKTRACE_H +#define _ASM_X86_STACKTRACE_H + +#include +#include + +#include +#include + +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, + STACK_TYPE_IRQ, + STACK_TYPE_SOFTIRQ, + STACK_TYPE_ENTRY, + STACK_TYPE_EXCEPTION, + STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, +}; + +struct stack_info { + enum stack_type type; + unsigned long *begin, *end, *next_sp; +}; + +bool in_task_stack(unsigned long *stack, struct task_struct *task, + struct stack_info *info); + +bool in_entry_stack(unsigned long *stack, struct stack_info *info); + +int get_stack_info(unsigned long *stack, struct task_struct *task, + struct stack_info *info, unsigned long *visit_mask); +bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, + struct stack_info *info); + +static __always_inline +bool get_stack_guard_info(unsigned long *stack, struct stack_info *info) +{ + /* make sure it's not in the stack proper */ + if (get_stack_info_noinstr(stack, current, info)) + return false; + /* but if it is in the page below it, we hit a guard */ + return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info); +} + +const char *stack_type_name(enum stack_type type); + +static inline bool on_stack(struct stack_info *info, void *addr, size_t len) +{ + void *begin = info->begin; + void *end = info->end; + + return (info->type != STACK_TYPE_UNKNOWN && + addr >= begin && addr < end && + addr + len > begin && addr + len <= end); +} + +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#else +#define STACKSLOTS_PER_LINE 4 +#endif + +#ifdef CONFIG_FRAME_POINTER +static inline unsigned long * +get_frame_pointer(struct task_struct *task, struct pt_regs *regs) +{ + if (regs) + return (unsigned long *)regs->bp; + + if (task == current) + return __builtin_frame_address(0); + + return &((struct inactive_task_frame *)task->thread.sp)->bp; +} +#else +static inline unsigned long * +get_frame_pointer(struct task_struct *task, struct pt_regs *regs) +{ + return NULL; +} +#endif /* CONFIG_FRAME_POINTER */ + +static inline unsigned long * +get_stack_pointer(struct task_struct *task, struct pt_regs *regs) +{ + if (regs) + return (unsigned long *)regs->sp; + + if (task == current) + return __builtin_frame_address(0); + + return (unsigned long *)task->thread.sp; +} + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +struct stack_frame_ia32 { + u32 next_frame; + u32 return_address; +}; + +void show_opcodes(struct pt_regs *regs, const char *loglvl); +void show_ip(struct pt_regs *regs, const char *loglvl); +#endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h new file mode 100644 index 000000000..343b722cc --- /dev/null +++ b/arch/x86/include/asm/static_call.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_STATIC_CALL_H +#define _ASM_STATIC_CALL_H + +#include + +/* + * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which + * uses the current value of the key->func pointer to do an indirect jump to + * the function. This trampoline is only used during boot, before the call + * sites get patched by static_call_update(). The name of this trampoline has + * a magical aspect: objtool uses it to find static call sites so it can create + * the .static_call_sites section. + * + * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which + * does a direct jump to the function. The direct jump gets patched by + * static_call_update(). + * + * Having the trampoline in a special section forces GCC to emit a JMP.d32 when + * it does tail-call optimization on the call; since you cannot compute the + * relative displacement across sections. + */ + +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ +#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ + asm(".pushsection .static_call.text, \"ax\" \n" \ + ".align 4 \n" \ + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + ANNOTATE_NOENDBR \ + insns " \n" \ + ".byte 0x0f, 0xb9, 0xcc \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") + +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +#ifdef CONFIG_RETHUNK +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif + +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) + +#define ARCH_ADD_TRAMP_KEY(name) \ + asm(".pushsection .static_call_tramp_key, \"a\" \n" \ + ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + +#endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h new file mode 100644 index 000000000..c3c2c1914 --- /dev/null +++ b/arch/x86/include/asm/string.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h new file mode 100644 index 000000000..32c0d981a --- /dev/null +++ b/arch/x86/include/asm/string_32.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_STRING_32_H +#define _ASM_X86_STRING_32_H + +#ifdef __KERNEL__ + +/* Let gcc decide whether to inline or use the out of line functions */ + +#define __HAVE_ARCH_STRCPY +extern char *strcpy(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCPY +extern char *strncpy(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCAT +extern char *strcat(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCAT +extern char *strncat(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCMP +extern int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRNCMP +extern int strncmp(const char *cs, const char *ct, size_t count); + +#define __HAVE_ARCH_STRCHR +extern char *strchr(const char *s, int c); + +#define __HAVE_ARCH_STRLEN +extern size_t strlen(const char *s); + +static __always_inline void *__memcpy(void *to, const void *from, size_t n) +{ + int d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "andl $3,%%ecx\n\t" + "jz 1f\n\t" + "rep ; movsb\n\t" + "1:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; +} + +/* + * This looks ugly, but the compiler can optimize it totally, + * as the count is constant. + */ +static __always_inline void *__constant_memcpy(void *to, const void *from, + size_t n) +{ + long esi, edi; + if (!n) + return to; + + switch (n) { + case 1: + *(char *)to = *(char *)from; + return to; + case 2: + *(short *)to = *(short *)from; + return to; + case 4: + *(int *)to = *(int *)from; + return to; + case 3: + *(short *)to = *(short *)from; + *((char *)to + 2) = *((char *)from + 2); + return to; + case 5: + *(int *)to = *(int *)from; + *((char *)to + 4) = *((char *)from + 4); + return to; + case 6: + *(int *)to = *(int *)from; + *((short *)to + 2) = *((short *)from + 2); + return to; + case 8: + *(int *)to = *(int *)from; + *((int *)to + 1) = *((int *)from + 1); + return to; + } + + esi = (long)from; + edi = (long)to; + if (n >= 5 * 4) { + /* large block: use rep prefix */ + int ecx; + asm volatile("rep ; movsl" + : "=&c" (ecx), "=&D" (edi), "=&S" (esi) + : "0" (n / 4), "1" (edi), "2" (esi) + : "memory" + ); + } else { + /* small block: don't clobber ecx + smaller code */ + if (n >= 4 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 3 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 2 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 1 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + } + switch (n % 4) { + /* tail */ + case 0: + return to; + case 1: + asm volatile("movsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + case 2: + asm volatile("movsw" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + default: + asm volatile("movsw\n\tmovsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + } +} + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *, const void *, size_t); + +#ifndef CONFIG_FORTIFY_SOURCE + +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) + +#endif /* !CONFIG_FORTIFY_SOURCE */ + +#define __HAVE_ARCH_MEMMOVE +void *memmove(void *dest, const void *src, size_t n); + +extern int memcmp(const void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE +#define memcmp __builtin_memcmp +#endif + +#define __HAVE_ARCH_MEMCHR +extern void *memchr(const void *cs, int c, size_t count); + +static inline void *__memset_generic(void *s, char c, size_t count) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "1" (s), "0" (count) + : "memory"); + return s; +} + +/* we might want to write optimized versions of these later */ +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) + +/* Added by Gertjan van Wingerde to make minix and sysv module work */ +#define __HAVE_ARCH_STRNLEN +extern size_t strnlen(const char *s, size_t count); +/* end of additional stuff */ + +#define __HAVE_ARCH_STRSTR +extern char *strstr(const char *cs, const char *ct); + +#define __memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_count_memset((s), (c), (count)) \ + : __memset_generic((s), (c), (count))) + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, size_t); +#ifndef CONFIG_FORTIFY_SOURCE +#define memset(s, c, count) __builtin_memset(s, c, count) +#endif /* !CONFIG_FORTIFY_SOURCE */ + +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(uint16_t *s, uint16_t v, size_t n) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosw" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(uint32_t *s, uint32_t v, size_t n) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +#define __HAVE_ARCH_MEMSCAN +extern void *memscan(void *addr, int c, size_t size); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_STRING_32_H */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h new file mode 100644 index 000000000..888731ccf --- /dev/null +++ b/arch/x86/include/asm/string_64.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_STRING_64_H +#define _ASM_X86_STRING_64_H + +#ifdef __KERNEL__ +#include + +/* Written 2002 by Andi Kleen */ + +/* Even with __builtin_ the compiler may decide to use the out of line + function. */ + +#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) +#include +#endif + +#define __HAVE_ARCH_MEMCPY 1 +#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) +#undef memcpy +#define memcpy __msan_memcpy +#else +extern void *memcpy(void *to, const void *from, size_t len); +#endif +extern void *__memcpy(void *to, const void *from, size_t len); + +#define __HAVE_ARCH_MEMSET +#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) +extern void *__msan_memset(void *s, int c, size_t n); +#undef memset +#define memset __msan_memset +#else +void *memset(void *s, int c, size_t n); +#endif +void *__memset(void *s, int c, size_t n); + +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(uint16_t *s, uint16_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosw" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(uint32_t *s, uint32_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET64 +static inline void *memset64(uint64_t *s, uint64_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosq" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMMOVE +#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) +#undef memmove +void *__msan_memmove(void *dest, const void *src, size_t len); +#define memmove __msan_memmove +#else +void *memmove(void *dest, const void *src, size_t count); +#endif +void *__memmove(void *dest, const void *src, size_t count); + +int memcmp(const void *cs, const void *ct, size_t count); +size_t strlen(const char *s); +char *strcpy(char *dest, const char *src); +char *strcat(char *dest, const char *src); +int strcmp(const char *cs, const char *ct); + +#if (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)) +/* + * For files that not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#undef memcpy +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#undef memmove +#define memmove(dst, src, len) __memmove(dst, src, len) +#undef memset +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 +void __memcpy_flushcache(void *dst, const void *src, size_t cnt); +static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + if (__builtin_constant_p(cnt)) { + switch (cnt) { + case 4: + asm ("movntil %1, %0" : "=m"(*(u32 *)dst) : "r"(*(u32 *)src)); + return; + case 8: + asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); + return; + case 16: + asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); + asm ("movntiq %1, %0" : "=m"(*(u64 *)(dst + 8)) : "r"(*(u64 *)(src + 8))); + return; + } + } + __memcpy_flushcache(dst, src, cnt); +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h new file mode 100644 index 000000000..a892494ca --- /dev/null +++ b/arch/x86/include/asm/suspend.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif +extern unsigned long restore_jump_address __visible; +extern unsigned long jump_address_phys; +extern unsigned long restore_cr3 __visible; +extern unsigned long temp_pgt __visible; +extern unsigned long relocated_restore_code __visible; +extern int relocate_restore_code(void); +/* Defined in hibernate_asm_32/64.S */ +extern asmlinkage __visible int restore_image(void); diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h new file mode 100644 index 000000000..a800abb1a --- /dev/null +++ b/arch/x86/include/asm/suspend_32.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2001-2002 Pavel Machek + * Based on code + * Copyright 2001 Patrick Mochel + */ +#ifndef _ASM_X86_SUSPEND_32_H +#define _ASM_X86_SUSPEND_32_H + +#include +#include + +/* image of the saved processor state */ +struct saved_context { + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; + unsigned long cr0, cr2, cr3, cr4; + u64 misc_enable; + struct saved_msrs saved_msrs; + struct desc_ptr gdt_desc; + struct desc_ptr idt; + u16 ldt; + u16 tss; + unsigned long tr; + unsigned long safety; + unsigned long return_address; + bool misc_enable_saved; +} __attribute__((packed)); + +/* routines for saving/restoring kernel state */ +extern char core_restore_code[]; +extern char restore_registers[]; + +#endif /* _ASM_X86_SUSPEND_32_H */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h new file mode 100644 index 000000000..54df06687 --- /dev/null +++ b/arch/x86/include/asm/suspend_64.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2001-2003 Pavel Machek + * Based on code + * Copyright 2001 Patrick Mochel + */ +#ifndef _ASM_X86_SUSPEND_64_H +#define _ASM_X86_SUSPEND_64_H + +#include +#include + +/* + * Image of the saved processor state, used by the low level ACPI suspend to + * RAM code and by the low level hibernation code. + * + * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S + * and make sure that __save/__restore_processor_state(), defined in + * arch/x86/power/cpu.c, still work as required. + * + * Because the structure is packed, make sure to avoid unaligned members. For + * optimisation purposes but also because tools like kmemleak only search for + * pointers that are aligned. + */ +struct saved_context { + struct pt_regs regs; + + /* + * User CS and SS are saved in current_pt_regs(). The rest of the + * segment selectors need to be saved and restored here. + */ + u16 ds, es, fs, gs; + + /* + * Usermode FSBASE and GSBASE may not match the fs and gs selectors, + * so we save them separately. We save the kernelmode GSBASE to + * restore percpu access after resume. + */ + unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; + + unsigned long cr0, cr2, cr3, cr4; + u64 misc_enable; + struct saved_msrs saved_msrs; + unsigned long efer; + u16 gdt_pad; /* Unused */ + struct desc_ptr gdt_desc; + u16 idt_pad; + struct desc_ptr idt; + u16 ldt; + u16 tss; + unsigned long tr; + unsigned long safety; + unsigned long return_address; + bool misc_enable_saved; +} __attribute__((packed)); + +#define loaddebug(thread,register) \ + set_debugreg((thread)->debugreg##register, register) + +/* routines for saving/restoring kernel state */ +extern char core_restore_code[]; +extern char restore_registers[]; + +#endif /* _ASM_X86_SUSPEND_64_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h new file mode 100644 index 000000000..184fd776c --- /dev/null +++ b/arch/x86/include/asm/svm.h @@ -0,0 +1,637 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SVM_H +#define __SVM_H + +#include +#include + +#include + +/* + * 32-bit intercept words in the VMCB Control Area, starting + * at Byte offset 000h. + */ + +enum intercept_words { + INTERCEPT_CR = 0, + INTERCEPT_DR, + INTERCEPT_EXCEPTION, + INTERCEPT_WORD3, + INTERCEPT_WORD4, + INTERCEPT_WORD5, + MAX_INTERCEPT, +}; + +enum { + /* Byte offset 000h (word 0) */ + INTERCEPT_CR0_READ = 0, + INTERCEPT_CR3_READ = 3, + INTERCEPT_CR4_READ = 4, + INTERCEPT_CR8_READ = 8, + INTERCEPT_CR0_WRITE = 16, + INTERCEPT_CR3_WRITE = 16 + 3, + INTERCEPT_CR4_WRITE = 16 + 4, + INTERCEPT_CR8_WRITE = 16 + 8, + /* Byte offset 004h (word 1) */ + INTERCEPT_DR0_READ = 32, + INTERCEPT_DR1_READ, + INTERCEPT_DR2_READ, + INTERCEPT_DR3_READ, + INTERCEPT_DR4_READ, + INTERCEPT_DR5_READ, + INTERCEPT_DR6_READ, + INTERCEPT_DR7_READ, + INTERCEPT_DR0_WRITE = 48, + INTERCEPT_DR1_WRITE, + INTERCEPT_DR2_WRITE, + INTERCEPT_DR3_WRITE, + INTERCEPT_DR4_WRITE, + INTERCEPT_DR5_WRITE, + INTERCEPT_DR6_WRITE, + INTERCEPT_DR7_WRITE, + /* Byte offset 008h (word 2) */ + INTERCEPT_EXCEPTION_OFFSET = 64, + /* Byte offset 00Ch (word 3) */ + INTERCEPT_INTR = 96, + INTERCEPT_NMI, + INTERCEPT_SMI, + INTERCEPT_INIT, + INTERCEPT_VINTR, + INTERCEPT_SELECTIVE_CR0, + INTERCEPT_STORE_IDTR, + INTERCEPT_STORE_GDTR, + INTERCEPT_STORE_LDTR, + INTERCEPT_STORE_TR, + INTERCEPT_LOAD_IDTR, + INTERCEPT_LOAD_GDTR, + INTERCEPT_LOAD_LDTR, + INTERCEPT_LOAD_TR, + INTERCEPT_RDTSC, + INTERCEPT_RDPMC, + INTERCEPT_PUSHF, + INTERCEPT_POPF, + INTERCEPT_CPUID, + INTERCEPT_RSM, + INTERCEPT_IRET, + INTERCEPT_INTn, + INTERCEPT_INVD, + INTERCEPT_PAUSE, + INTERCEPT_HLT, + INTERCEPT_INVLPG, + INTERCEPT_INVLPGA, + INTERCEPT_IOIO_PROT, + INTERCEPT_MSR_PROT, + INTERCEPT_TASK_SWITCH, + INTERCEPT_FERR_FREEZE, + INTERCEPT_SHUTDOWN, + /* Byte offset 010h (word 4) */ + INTERCEPT_VMRUN = 128, + INTERCEPT_VMMCALL, + INTERCEPT_VMLOAD, + INTERCEPT_VMSAVE, + INTERCEPT_STGI, + INTERCEPT_CLGI, + INTERCEPT_SKINIT, + INTERCEPT_RDTSCP, + INTERCEPT_ICEBP, + INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, + INTERCEPT_XSETBV, + INTERCEPT_RDPRU, + TRAP_EFER_WRITE, + TRAP_CR0_WRITE, + TRAP_CR1_WRITE, + TRAP_CR2_WRITE, + TRAP_CR3_WRITE, + TRAP_CR4_WRITE, + TRAP_CR5_WRITE, + TRAP_CR6_WRITE, + TRAP_CR7_WRITE, + TRAP_CR8_WRITE, + /* Byte offset 014h (word 5) */ + INTERCEPT_INVLPGB = 160, + INTERCEPT_INVLPGB_ILLEGAL, + INTERCEPT_INVPCID, + INTERCEPT_MCOMMIT, + INTERCEPT_TLBSYNC, +}; + + +struct __attribute__ ((__packed__)) vmcb_control_area { + u32 intercepts[MAX_INTERCEPT]; + u32 reserved_1[15 - MAX_INTERCEPT]; + u16 pause_filter_thresh; + u16 pause_filter_count; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u8 reserved_2[3]; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u8 reserved_3[4]; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u64 avic_vapic_bar; + u64 ghcb_gpa; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 virt_ext; + u32 clean; + u32 reserved_5; + u64 next_rip; + u8 insn_len; + u8 insn_bytes[15]; + u64 avic_backing_page; /* Offset 0xe0 */ + u8 reserved_6[8]; /* Offset 0xe8 */ + u64 avic_logical_id; /* Offset 0xf0 */ + u64 avic_physical_id; /* Offset 0xf8 */ + u8 reserved_7[8]; + u64 vmsa_pa; /* Used for an SEV-ES guest */ + u8 reserved_8[720]; + /* + * Offset 0x3e0, 32 bytes reserved + * for use by hypervisor/software. + */ + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; +}; + + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 +#define TLB_CONTROL_FLUSH_ASID 3 +#define TLB_CONTROL_FLUSH_ASID_LOCAL 7 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + +#define AVIC_ENABLE_SHIFT 31 +#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) + +#define X2APIC_MODE_SHIFT 30 +#define X2APIC_MODE_MASK (1 << X2APIC_MODE_SHIFT) + +#define LBR_CTL_ENABLE_MASK BIT_ULL(0) +#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) + +#define SVM_INTERRUPT_SHADOW_MASK BIT_ULL(0) +#define SVM_GUEST_INTERRUPT_MASK BIT_ULL(1) + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + +#define SVM_NESTED_CTL_NP_ENABLE BIT(0) +#define SVM_NESTED_CTL_SEV_ENABLE BIT(1) +#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) + + +#define SVM_TSC_RATIO_RSVD 0xffffff0000000000ULL +#define SVM_TSC_RATIO_MIN 0x0000000000000001ULL +#define SVM_TSC_RATIO_MAX 0x000000ffffffffffULL +#define SVM_TSC_RATIO_DEFAULT 0x0100000000ULL + + +/* AVIC */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL) + +#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + +#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 +#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 +#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF + +enum avic_ipi_failure_cause { + AVIC_IPI_FAILURE_INVALID_INT_TYPE, + AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, + AVIC_IPI_FAILURE_INVALID_TARGET, + AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, + AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, +}; + +#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) + +/* + * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as + * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually. + */ +#define AVIC_MAX_PHYSICAL_ID 0XFEULL + +/* + * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511). + */ +#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL + +static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID); +static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID); + +#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + + +struct vmcb_seg { + u16 selector; + u16 attrib; + u32 limit; + u64 base; +} __packed; + +/* Save area definition for legacy and SEV-MEM guests */ +struct vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[42]; + u8 vmpl; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u64 s_cet; + u64 ssp; + u64 isst_addr; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_5[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; + u8 reserved_6[72]; + u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ +} __packed; + +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u64 vmpl0_ssp; + u64 vmpl1_ssp; + u64 vmpl2_ssp; + u64 vmpl3_ssp; + u64 u_cet; + u8 reserved_1[2]; + u8 vmpl; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[104]; + u64 xss; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u64 dr0; + u64 dr1; + u64 dr2; + u64 dr3; + u64 dr0_addr_mask; + u64 dr1_addr_mask; + u64 dr2_addr_mask; + u64 dr3_addr_mask; + u8 reserved_4[24]; + u64 rsp; + u64 s_cet; + u64 ssp; + u64 isst_addr; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_5[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; + u8 reserved_7[80]; + u32 pkru; + u8 reserved_8[20]; + u64 reserved_9; /* rax already available at 0x01f8 */ + u64 rcx; + u64 rdx; + u64 rbx; + u64 reserved_10; /* rsp already available at 0x01d8 */ + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u8 reserved_11[16]; + u64 guest_exit_info_1; + u64 guest_exit_info_2; + u64 guest_exit_int_info; + u64 guest_nrip; + u64 sev_features; + u64 vintr_ctrl; + u64 guest_exit_code; + u64 virtual_tom; + u64 tlb_id; + u64 pcpu_id; + u64 event_inj; + u64 xcr0; + u8 reserved_12[16]; + + /* Floating point area */ + u64 x87_dp; + u32 mxcsr; + u16 x87_ftw; + u16 x87_fsw; + u16 x87_fcw; + u16 x87_fop; + u16 x87_ds; + u16 x87_cs; + u64 x87_rip; + u8 fpreg_x87[80]; + u8 fpreg_xmm[256]; + u8 fpreg_ymm[256]; +} __packed; + +struct ghcb_save_area { + u8 reserved_1[203]; + u8 cpl; + u8 reserved_2[116]; + u64 xss; + u8 reserved_3[24]; + u64 dr7; + u8 reserved_4[16]; + u64 rip; + u8 reserved_5[88]; + u64 rsp; + u8 reserved_6[24]; + u64 rax; + u8 reserved_7[264]; + u64 rcx; + u64 rdx; + u64 rbx; + u8 reserved_8[8]; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u8 reserved_9[16]; + u64 sw_exit_code; + u64 sw_exit_info_1; + u64 sw_exit_info_2; + u64 sw_scratch; + u8 reserved_10[56]; + u64 xcr0; + u8 valid_bitmap[16]; + u64 x87_state_gpa; +} __packed; + +#define GHCB_SHARED_BUF_SIZE 2032 + +struct ghcb { + struct ghcb_save_area save; + u8 reserved_save[2048 - sizeof(struct ghcb_save_area)]; + + u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; + + u8 reserved_1[10]; + u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */ + u32 ghcb_usage; +} __packed; + + +#define EXPECTED_VMCB_SAVE_AREA_SIZE 740 +#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 +#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648 +#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 +#define EXPECTED_GHCB_SIZE PAGE_SIZE + +static inline void __unused_size_checks(void) +{ + BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); +} + +struct vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +} __packed; + +#define SVM_CPUID_FUNC 0x8000000a + +#define SVM_VM_CR_SVM_DISABLE 4 + +#define SVM_SELECTOR_S_SHIFT 4 +#define SVM_SELECTOR_DPL_SHIFT 5 +#define SVM_SELECTOR_P_SHIFT 7 +#define SVM_SELECTOR_AVL_SHIFT 8 +#define SVM_SELECTOR_L_SHIFT 9 +#define SVM_SELECTOR_DB_SHIFT 10 +#define SVM_SELECTOR_G_SHIFT 11 + +#define SVM_SELECTOR_TYPE_MASK (0xf) +#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) +#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) +#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) +#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) +#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) +#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) +#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) + +#define SVM_SELECTOR_WRITE_MASK (1 << 1) +#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK +#define SVM_SELECTOR_CODE_MASK (1 << 3) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 +#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 + +#define SVM_EXITINFO_REG_MASK 0x0F + +#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) + +/* GHCB Accessor functions */ + +#define GHCB_BITMAP_IDX(field) \ + (offsetof(struct ghcb_save_area, field) / sizeof(u64)) + +#define DEFINE_GHCB_ACCESSORS(field) \ + static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ + { \ + return test_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&ghcb->save.valid_bitmap); \ + } \ + \ + static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \ + { \ + return ghcb->save.field; \ + } \ + \ + static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ + { \ + return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \ + } \ + \ + static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ + { \ + __set_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&ghcb->save.valid_bitmap); \ + ghcb->save.field = value; \ + } + +DEFINE_GHCB_ACCESSORS(cpl) +DEFINE_GHCB_ACCESSORS(rip) +DEFINE_GHCB_ACCESSORS(rsp) +DEFINE_GHCB_ACCESSORS(rax) +DEFINE_GHCB_ACCESSORS(rcx) +DEFINE_GHCB_ACCESSORS(rdx) +DEFINE_GHCB_ACCESSORS(rbx) +DEFINE_GHCB_ACCESSORS(rbp) +DEFINE_GHCB_ACCESSORS(rsi) +DEFINE_GHCB_ACCESSORS(rdi) +DEFINE_GHCB_ACCESSORS(r8) +DEFINE_GHCB_ACCESSORS(r9) +DEFINE_GHCB_ACCESSORS(r10) +DEFINE_GHCB_ACCESSORS(r11) +DEFINE_GHCB_ACCESSORS(r12) +DEFINE_GHCB_ACCESSORS(r13) +DEFINE_GHCB_ACCESSORS(r14) +DEFINE_GHCB_ACCESSORS(r15) +DEFINE_GHCB_ACCESSORS(sw_exit_code) +DEFINE_GHCB_ACCESSORS(sw_exit_info_1) +DEFINE_GHCB_ACCESSORS(sw_exit_info_2) +DEFINE_GHCB_ACCESSORS(sw_scratch) +DEFINE_GHCB_ACCESSORS(xcr0) + +#endif diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h new file mode 100644 index 000000000..c08eb0fdd --- /dev/null +++ b/arch/x86/include/asm/switch_to.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SWITCH_TO_H +#define _ASM_X86_SWITCH_TO_H + +#include + +struct task_struct; /* one of the stranger aspects of C forward declarations */ + +struct task_struct *__switch_to_asm(struct task_struct *prev, + struct task_struct *next); + +__visible struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); + +asmlinkage void ret_from_fork(void); + +/* + * This is the structure pointed to by thread.sp for an inactive task. The + * order of the fields must match the code in __switch_to_asm(). + */ +struct inactive_task_frame { +#ifdef CONFIG_X86_64 + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; +#else + unsigned long flags; + unsigned long si; + unsigned long di; +#endif + unsigned long bx; + + /* + * These two fields must be together. They form a stack frame header, + * needed by get_frame_pointer(). + */ + unsigned long bp; + unsigned long ret_addr; +}; + +struct fork_frame { + struct inactive_task_frame frame; + struct pt_regs regs; +}; + +#define switch_to(prev, next, last) \ +do { \ + ((last) = __switch_to_asm((prev), (next))); \ +} while (0) + +#ifdef CONFIG_X86_32 +static inline void refresh_sysenter_cs(struct thread_struct *thread) +{ + /* Only happens when SEP is enabled, no need to test "SEP"arately: */ + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) + return; + + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); +} +#endif + +/* This is used when switching tasks or entering/exiting vm86 mode. */ +static inline void update_task_stack(struct task_struct *task) +{ + /* sp0 always points to the entry trampoline stack, which is constant: */ +#ifdef CONFIG_X86_32 + if (static_cpu_has(X86_FEATURE_XENPV)) + load_sp0(task->thread.sp0); + else + this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); +#else + /* Xen PV enters the kernel on the thread stack. */ + if (static_cpu_has(X86_FEATURE_XENPV)) + load_sp0(task_top_of_stack(task)); +#endif +} + +static inline void kthread_frame_init(struct inactive_task_frame *frame, + int (*fun)(void *), void *arg) +{ + frame->bx = (unsigned long)fun; +#ifdef CONFIG_X86_32 + frame->di = (unsigned long)arg; +#else + frame->r12 = (unsigned long)arg; +#endif +} + +#endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h new file mode 100644 index 000000000..6d8d6bc18 --- /dev/null +++ b/arch/x86/include/asm/sync_bitops.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SYNC_BITOPS_H +#define _ASM_X86_SYNC_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#include + +#define ADDR (*(volatile long *)addr) + +/** + * sync_set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void sync_set_bit(long nr, volatile unsigned long *addr) +{ + asm volatile("lock; " __ASM_SIZE(bts) " %1,%0" + : "+m" (ADDR) + : "Ir" (nr) + : "memory"); +} + +/** + * sync_clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * sync_clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() + * in order to ensure changes are visible on other processors. + */ +static inline void sync_clear_bit(long nr, volatile unsigned long *addr) +{ + asm volatile("lock; " __ASM_SIZE(btr) " %1,%0" + : "+m" (ADDR) + : "Ir" (nr) + : "memory"); +} + +/** + * sync_change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * sync_change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void sync_change_bit(long nr, volatile unsigned long *addr) +{ + asm volatile("lock; " __ASM_SIZE(btc) " %1,%0" + : "+m" (ADDR) + : "Ir" (nr) + : "memory"); +} + +/** + * sync_test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr) +{ + return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr); +} + +/** + * sync_test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) +{ + return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr); +} + +/** + * sync_test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) +{ + return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr); +} + +#define sync_test_bit(nr, addr) test_bit(nr, addr) + +#undef ADDR + +#endif /* _ASM_X86_SYNC_BITOPS_H */ diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h new file mode 100644 index 000000000..ab7382f92 --- /dev/null +++ b/arch/x86/include/asm/sync_core.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SYNC_CORE_H +#define _ASM_X86_SYNC_CORE_H + +#include +#include +#include +#include + +#ifdef CONFIG_X86_32 +static inline void iret_to_self(void) +{ + asm volatile ( + "pushfl\n\t" + "pushl %%cs\n\t" + "pushl $1f\n\t" + "iret\n\t" + "1:" + : ASM_CALL_CONSTRAINT : : "memory"); +} +#else +static inline void iret_to_self(void) +{ + unsigned int tmp; + + asm volatile ( + "mov %%ss, %0\n\t" + "pushq %q0\n\t" + "pushq %%rsp\n\t" + "addq $8, (%%rsp)\n\t" + "pushfq\n\t" + "mov %%cs, %0\n\t" + "pushq %q0\n\t" + "pushq $1f\n\t" + "iretq\n\t" + "1:" + : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); +} +#endif /* CONFIG_X86_32 */ + +/* + * This function forces the icache and prefetched instruction stream to + * catch up with reality in two very specific cases: + * + * a) Text was modified using one virtual address and is about to be executed + * from the same physical page at a different virtual address. + * + * b) Text was modified on a different CPU, may subsequently be + * executed on this CPU, and you want to make sure the new version + * gets executed. This generally means you're calling this in an IPI. + * + * If you're calling this for a different reason, you're probably doing + * it wrong. + * + * Like all of Linux's memory ordering operations, this is a + * compiler barrier as well. + */ +static inline void sync_core(void) +{ + /* + * The SERIALIZE instruction is the most straightforward way to + * do this, but it is not universally available. + */ + if (static_cpu_has(X86_FEATURE_SERIALIZE)) { + serialize(); + return; + } + + /* + * For all other processors, there are quite a few ways to do this. + * IRET-to-self is nice because it works on every CPU, at any CPL + * (so it's compatible with paravirtualization), and it never exits + * to a hypervisor. The only downsides are that it's a bit slow + * (it seems to be a bit more than 2x slower than the fastest + * options) and that it unmasks NMIs. The "push %cs" is needed, + * because in paravirtual environments __KERNEL_CS may not be a + * valid CS value when we do IRET directly. + * + * In case NMI unmasking or performance ever becomes a problem, + * the next best option appears to be MOV-to-CR2 and an + * unconditional jump. That sequence also works on all CPUs, + * but it will fault at CPL3 (i.e. Xen PV). + * + * CPUID is the conventional way, but it's nasty: it doesn't + * exist on some 486-like CPUs, and it usually exits to a + * hypervisor. + */ + iret_to_self(); +} + +/* + * Ensure that a core serializing instruction is issued before returning + * to user-mode. x86 implements return to user-space through sysexit, + * sysrel, and sysretq, which are not core serializing. + */ +static inline void sync_core_before_usermode(void) +{ + /* With PTI, we unconditionally serialize before running user code. */ + if (static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * Even if we're in an interrupt, we might reschedule before returning, + * in which case we could switch to a different thread in the same mm + * and return using SYSRET or SYSEXIT. Instead of trying to keep + * track of our need to sync the core, just sync right away. + */ + sync_core(); +} + +#endif /* _ASM_X86_SYNC_CORE_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h new file mode 100644 index 000000000..5b85987a5 --- /dev/null +++ b/arch/x86/include/asm/syscall.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_X86_SYSCALL_H +#define _ASM_X86_SYSCALL_H + +#include +#include +#include +#include /* for TS_COMPAT */ +#include + +typedef long (*sys_call_ptr_t)(const struct pt_regs *); +extern const sys_call_ptr_t sys_call_table[]; + +#if defined(CONFIG_X86_32) +#define ia32_sys_call_table sys_call_table +#else +/* + * These may not exist, but still put the prototypes in so we + * can use IS_ENABLED(). + */ +extern const sys_call_ptr_t ia32_sys_call_table[]; +extern const sys_call_ptr_t x32_sys_call_table[]; +#endif + +/* + * Only the low 32 bits of orig_ax are meaningful, so we return int. + * This importantly ignores the high bits on 64-bit, so comparisons + * sign-extend the low 32 bits. + */ +static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) +{ + return regs->orig_ax; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->ax = regs->orig_ax; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->ax; +#ifdef CONFIG_IA32_EMULATION + /* + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + */ + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long) (int) error; +#endif + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->ax; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->ax = (long) error ?: val; +} + +#ifdef CONFIG_X86_32 + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned long *args) +{ + memcpy(args, ®s->bx, 6 * sizeof(args[0])); +} + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_I386; +} + +#else /* CONFIG_X86_64 */ + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task->thread_info.status & TS_COMPAT) { + *args++ = regs->bx; + *args++ = regs->cx; + *args++ = regs->dx; + *args++ = regs->si; + *args++ = regs->di; + *args = regs->bp; + } else +# endif + { + *args++ = regs->di; + *args++ = regs->si; + *args++ = regs->dx; + *args++ = regs->r10; + *args++ = regs->r8; + *args = regs->r9; + } +} + +static inline int syscall_get_arch(struct task_struct *task) +{ + /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ + return (IS_ENABLED(CONFIG_IA32_EMULATION) && + task->thread_info.status & TS_COMPAT) + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; +} + +void do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_syscall_32(struct pt_regs *regs); +long do_fast_syscall_32(struct pt_regs *regs); + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h new file mode 100644 index 000000000..e3323a9dc --- /dev/null +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - x86 specific wrappers to syscall definitions + */ + +#ifndef _ASM_X86_SYSCALL_WRAPPER_H +#define _ASM_X86_SYSCALL_WRAPPER_H + +#include + +extern long __x64_sys_ni_syscall(const struct pt_regs *regs); +extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); + +/* + * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes + * struct pt_regs *regs as the only argument of the syscall stub(s) named as: + * __x64_sys_*() - 64-bit native syscall + * __ia32_sys_*() - 32-bit native syscall or common compat syscall + * __ia32_compat_sys_*() - 32-bit compat syscall + * __x64_compat_sys_*() - 64-bit X32 compat syscall + * + * The registers are decoded according to the ABI: + * 64-bit: RDI, RSI, RDX, R10, R8, R9 + * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP + * + * The stub then passes the decoded arguments to the __se_sys_*() wrapper to + * perform sign-extension (omitted for zero-argument syscalls). Finally the + * arguments are passed to the __do_sys_*() function which is the actual + * syscall. These wrappers are marked as inline so the compiler can optimize + * the functions where appropriate. + * + * Example assembly (slightly re-ordered for better readability): + * + * <__x64_sys_recv>: <-- syscall with 4 parameters + * callq <__fentry__> + * + * mov 0x70(%rdi),%rdi <-- decode regs->di + * mov 0x68(%rdi),%rsi <-- decode regs->si + * mov 0x60(%rdi),%rdx <-- decode regs->dx + * mov 0x38(%rdi),%rcx <-- decode regs->r10 + * + * xor %r9d,%r9d <-- clear %r9 + * xor %r8d,%r8d <-- clear %r8 + * + * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() + * which takes 6 arguments + * + * cltq <-- extend return value to 64-bit + * retq <-- return + * + * This approach avoids leaking random user-provided register content down + * the call chain. + */ + +/* Mapping of registers to parameters for syscalls on x86-64 and x32 */ +#define SC_X86_64_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->di,,regs->si,,regs->dx \ + ,,regs->r10,,regs->r8,,regs->r9) \ + + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + +/* Mapping of registers to parameters for syscalls on i386 */ +#define SC_IA32_REGS_TO_ARGS(x, ...) \ + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ + +#define __SYS_STUB0(abi, name) \ + long __##abi##_##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \ + long __##abi##_##name(const struct pt_regs *regs) \ + __alias(__do_##name); + +#define __SYS_STUBx(abi, name, ...) \ + long __##abi##_##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \ + long __##abi##_##name(const struct pt_regs *regs) \ + { \ + return __se_##name(__VA_ARGS__); \ + } + +#define __COND_SYSCALL(abi, name) \ + __weak long __##abi##_##name(const struct pt_regs *__unused); \ + __weak long __##abi##_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } + +#define __SYS_NI(abi, name) \ + SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers); + +#ifdef CONFIG_X86_64 +#define __X64_SYS_STUB0(name) \ + __SYS_STUB0(x64, sys_##name) + +#define __X64_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(x64, sys##name, \ + SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __X64_COND_SYSCALL(name) \ + __COND_SYSCALL(x64, sys_##name) + +#define __X64_SYS_NI(name) \ + __SYS_NI(x64, sys_##name) +#else /* CONFIG_X86_64 */ +#define __X64_SYS_STUB0(name) +#define __X64_SYS_STUBx(x, name, ...) +#define __X64_COND_SYSCALL(name) +#define __X64_SYS_NI(name) +#endif /* CONFIG_X86_64 */ + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#define __IA32_SYS_STUB0(name) \ + __SYS_STUB0(ia32, sys_##name) + +#define __IA32_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(ia32, sys##name, \ + SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __IA32_COND_SYSCALL(name) \ + __COND_SYSCALL(ia32, sys_##name) + +#define __IA32_SYS_NI(name) \ + __SYS_NI(ia32, sys_##name) +#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ +#define __IA32_SYS_STUB0(name) +#define __IA32_SYS_STUBx(x, name, ...) +#define __IA32_COND_SYSCALL(name) +#define __IA32_SYS_NI(name) +#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ + +#ifdef CONFIG_IA32_EMULATION +/* + * For IA32 emulation, we need to handle "compat" syscalls *and* create + * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the + * ia32 regs in the proper order for shared or "common" syscalls. As some + * syscalls may not be implemented, we need to expand COND_SYSCALL in + * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this + * case as well. + */ +#define __IA32_COMPAT_SYS_STUB0(name) \ + __SYS_STUB0(ia32, compat_sys_##name) + +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(ia32, compat_sys##name, \ + SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __IA32_COMPAT_COND_SYSCALL(name) \ + __COND_SYSCALL(ia32, compat_sys_##name) + +#define __IA32_COMPAT_SYS_NI(name) \ + __SYS_NI(ia32, compat_sys_##name) + +#else /* CONFIG_IA32_EMULATION */ +#define __IA32_COMPAT_SYS_STUB0(name) +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) +#define __IA32_COMPAT_COND_SYSCALL(name) +#define __IA32_COMPAT_SYS_NI(name) +#endif /* CONFIG_IA32_EMULATION */ + + +#ifdef CONFIG_X86_X32_ABI +/* + * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware + * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common + * with x86_64 obviously do not need such care. + */ +#define __X32_COMPAT_SYS_STUB0(name) \ + __SYS_STUB0(x64, compat_sys_##name) + +#define __X32_COMPAT_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(x64, compat_sys##name, \ + SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __X32_COMPAT_COND_SYSCALL(name) \ + __COND_SYSCALL(x64, compat_sys_##name) + +#define __X32_COMPAT_SYS_NI(name) \ + __SYS_NI(x64, compat_sys_##name) +#else /* CONFIG_X86_X32_ABI */ +#define __X32_COMPAT_SYS_STUB0(name) +#define __X32_COMPAT_SYS_STUBx(x, name, ...) +#define __X32_COMPAT_COND_SYSCALL(name) +#define __X32_COMPAT_SYS_NI(name) +#endif /* CONFIG_X86_X32_ABI */ + + +#ifdef CONFIG_COMPAT +/* + * Compat means IA32_EMULATION and/or X86_X32. As they use a different + * mapping of registers to parameters, we need to generate stubs for each + * of them. + */ +#define COMPAT_SYSCALL_DEFINE0(name) \ + static long \ + __do_compat_sys_##name(const struct pt_regs *__unused); \ + __IA32_COMPAT_SYS_STUB0(name) \ + __X32_COMPAT_SYS_STUB0(name) \ + static long \ + __do_compat_sys_##name(const struct pt_regs *__unused) + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + __IA32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \ + __X32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + } \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As some compat syscalls may not be implemented, we need to expand + * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in + * kernel/time/posix-stubs.c to cover this case as well. + */ +#define COND_SYSCALL_COMPAT(name) \ + __IA32_COMPAT_COND_SYSCALL(name) \ + __X32_COMPAT_COND_SYSCALL(name) + +#define COMPAT_SYS_NI(name) \ + __IA32_COMPAT_SYS_NI(name) \ + __X32_COMPAT_SYS_NI(name) + +#endif /* CONFIG_COMPAT */ + +#define __SYSCALL_DEFINEx(x, name, ...) \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + __X64_SYS_STUBx(x, name, __VA_ARGS__) \ + __IA32_SYS_STUBx(x, name, __VA_ARGS__) \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for + * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not + * hurt, we only need to re-define it here to keep the naming congruent to + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() + * macros to work correctly. + */ +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + static long __do_sys_##sname(const struct pt_regs *__unused); \ + __X64_SYS_STUB0(sname) \ + __IA32_SYS_STUB0(sname) \ + static long __do_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + __X64_COND_SYSCALL(name) \ + __IA32_COND_SYSCALL(name) + +#define SYS_NI(name) \ + __X64_SYS_NI(name) \ + __IA32_SYS_NI(name) + + +/* + * For VSYSCALLS, we need to declare these three syscalls with the new + * pt_regs-based calling convention for in-kernel use. + */ +long __x64_sys_getcpu(const struct pt_regs *regs); +long __x64_sys_gettimeofday(const struct pt_regs *regs); +long __x64_sys_time(const struct pt_regs *regs); + +#endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h new file mode 100644 index 000000000..6714a3582 --- /dev/null +++ b/arch/x86/include/asm/syscalls.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * syscalls.h - Linux syscall interfaces (arch-specific) + * + * Copyright (c) 2008 Jaswinder Singh Rajput + */ + +#ifndef _ASM_X86_SYSCALLS_H +#define _ASM_X86_SYSCALLS_H + +/* Common in X86_32 and X86_64 */ +/* kernel/ioport.c */ +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); + +#endif /* _ASM_X86_SYSCALLS_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h new file mode 100644 index 000000000..020c81a7c --- /dev/null +++ b/arch/x86/include/asm/tdx.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2022 Intel Corporation */ +#ifndef _ASM_X86_TDX_H +#define _ASM_X86_TDX_H + +#include +#include +#include +#include + +/* + * SW-defined error codes. + * + * Bits 47:40 == 0xFF indicate Reserved status code class that never used by + * TDX module. + */ +#define TDX_ERROR _BITUL(63) +#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40)) +#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000)) + +#ifndef __ASSEMBLY__ + +/* + * Used to gather the output registers values of the TDCALL and SEAMCALL + * instructions when requesting services from the TDX module. + * + * This is a software only structure and not part of the TDX module/VMM ABI. + */ +struct tdx_module_output { + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 r10; + u64 r11; +}; + +/* + * Used by the #VE exception handler to gather the #VE exception + * info from the TDX module. This is a software only structure + * and not part of the TDX module/VMM ABI. + */ +struct ve_info { + u64 exit_reason; + u64 exit_qual; + /* Guest Linear (virtual) Address */ + u64 gla; + /* Guest Physical Address */ + u64 gpa; + u32 instr_len; + u32 instr_info; +}; + +#ifdef CONFIG_INTEL_TDX_GUEST + +void __init tdx_early_init(void); + +/* Used to communicate with the TDX module */ +u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, + struct tdx_module_output *out); + +void tdx_get_ve_info(struct ve_info *ve); + +bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); + +void tdx_safe_halt(void); + +bool tdx_early_handle_ve(struct pt_regs *regs); + +#else + +static inline void tdx_early_init(void) { }; +static inline void tdx_safe_halt(void) { }; + +static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } + +#endif /* CONFIG_INTEL_TDX_GUEST */ + +#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST) +long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, + unsigned long p3, unsigned long p4); +#else +static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + return -ENODEV; +} +#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */ +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h new file mode 100644 index 000000000..85b85a275 --- /dev/null +++ b/arch/x86/include/asm/text-patching.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TEXT_PATCHING_H +#define _ASM_X86_TEXT_PATCHING_H + +#include +#include +#include + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); +#else +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL +#endif + +/* + * Currently, the max observed size in the kernel code is + * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. + * Raise it if needed. + */ +#define POKE_MAX_OPCODE_SIZE 5 + +extern void text_poke_early(void *addr, const void *opcode, size_t len); + +/* + * Clear and restore the kernel write-protection flag on the local CPU. + * Allows the kernel to edit read-only pages. + * Side-effect: any interrupt handler running between save and restore will have + * the ability to write to read-only pages. + * + * Warning: + * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and + * no thread can be preempted in the instructions being modified (no iret to an + * invalid instruction possible) or if the instructions are changed from a + * consistent state to another consistent state atomically. + * On the local CPU you need to be protected against NMI or MCE handlers seeing + * an inconsistent instruction while you patch. + */ +extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void text_poke_sync(void); +extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); +extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +extern void *text_poke_set(void *addr, int c, size_t len); +extern int poke_int3_handler(struct pt_regs *regs); +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); + +extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); +extern void text_poke_finish(void); + +#define INT3_INSN_SIZE 1 +#define INT3_INSN_OPCODE 0xCC + +#define RET_INSN_SIZE 1 +#define RET_INSN_OPCODE 0xC3 + +#define CALL_INSN_SIZE 5 +#define CALL_INSN_OPCODE 0xE8 + +#define JMP32_INSN_SIZE 5 +#define JMP32_INSN_OPCODE 0xE9 + +#define JMP8_INSN_SIZE 2 +#define JMP8_INSN_OPCODE 0xEB + +#define DISP32_SIZE 4 + +static __always_inline int text_opcode_size(u8 opcode) +{ + int size = 0; + +#define __CASE(insn) \ + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break + + switch(opcode) { + __CASE(INT3); + __CASE(RET); + __CASE(CALL); + __CASE(JMP32); + __CASE(JMP8); + } + +#undef __CASE + + return size; +} + +union text_poke_insn { + u8 text[POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) +{ + union text_poke_insn *insn = buf; + + BUG_ON(size < text_opcode_size(opcode)); + + /* + * Hide the addresses to avoid the compiler folding in constants when + * referencing code, these can mess up annotations like + * ANNOTATE_NOENDBR. + */ + OPTIMIZER_HIDE_VAR(insn); + OPTIMIZER_HIDE_VAR(addr); + OPTIMIZER_HIDE_VAR(dest); + + insn->opcode = opcode; + + if (size > 1) { + insn->disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP8 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); + } + } +} + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); + return &insn.text; +} + +extern int after_bootmem; +extern __ro_after_init struct mm_struct *poking_mm; +extern __ro_after_init unsigned long poking_addr; + +#ifndef CONFIG_UML_X86 +static __always_inline +void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +static __always_inline +void int3_emulate_push(struct pt_regs *regs, unsigned long val) +{ + /* + * The int3 handler in entry_64.S adds a gap between the + * stack where the break point happened, and the saving of + * pt_regs. We can extend the original stack because of + * this gap. See the idtentry macro's create_gap option. + * + * Similarly entry_32.S will have a gap on the stack for (any) hardware + * exception and pt_regs; see FIXUP_FRAME. + */ + regs->sp -= sizeof(unsigned long); + *(unsigned long *)regs->sp = val; +} + +static __always_inline +unsigned long int3_emulate_pop(struct pt_regs *regs) +{ + unsigned long val = *(unsigned long *)regs->sp; + regs->sp += sizeof(unsigned long); + return val; +} + +static __always_inline +void int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); + int3_emulate_jmp(regs, func); +} + +static __always_inline +void int3_emulate_ret(struct pt_regs *regs) +{ + unsigned long ip = int3_emulate_pop(regs); + int3_emulate_jmp(regs, ip); +} + +static __always_inline +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +{ + static const unsigned long jcc_mask[6] = { + [0] = X86_EFLAGS_OF, + [1] = X86_EFLAGS_CF, + [2] = X86_EFLAGS_ZF, + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, + [4] = X86_EFLAGS_SF, + [5] = X86_EFLAGS_PF, + }; + + bool invert = cc & 1; + bool match; + + if (cc < 0xc) { + match = regs->flags & jcc_mask[cc >> 1]; + } else { + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + if (cc >= 0xe) + match = match || (regs->flags & X86_EFLAGS_ZF); + } + + if ((match && !invert) || (!match && invert)) + ip += disp; + + int3_emulate_jmp(regs, ip); +} + +#endif /* !CONFIG_UML_X86 */ + +#endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h new file mode 100644 index 000000000..91a7b6687 --- /dev/null +++ b/arch/x86/include/asm/thermal.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_THERMAL_H +#define _ASM_X86_THERMAL_H + +#ifdef CONFIG_X86_THERMAL_VECTOR +void therm_lvt_init(void); +void intel_init_thermal(struct cpuinfo_x86 *c); +bool x86_thermal_enabled(void); +void intel_thermal_interrupt(void); +#else +static inline void therm_lvt_init(void) { } +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +#endif + +#endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h new file mode 100644 index 000000000..f0cb881c1 --- /dev/null +++ b/arch/x86/include/asm/thread_info.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* thread_info.h: low-level thread information + * + * Copyright (C) 2002 David Howells (dhowells@redhat.com) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + */ + +#ifndef _ASM_X86_THREAD_INFO_H +#define _ASM_X86_THREAD_INFO_H + +#include +#include +#include +#include + +/* + * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we + * reserve at the top of the kernel stack. We do it because of a nasty + * 32-bit corner case. On x86_32, the hardware stack frame is + * variable-length. Except for vm86 mode, struct pt_regs assumes a + * maximum-length frame. If we enter from CPL 0, the top 8 bytes of + * pt_regs don't actually exist. Ordinarily this doesn't matter, but it + * does in at least one case: + * + * If we take an NMI early enough in SYSENTER, then we can end up with + * pt_regs that extends above sp0. On the way out, in the espfix code, + * we can read the saved SS value, but that value will be above sp0. + * Without this offset, that can result in a page fault. (We are + * careful that, in this case, the value we read doesn't matter.) + * + * In vm86 mode, the hardware frame is much longer still, so add 16 + * bytes to make room for the real-mode segments. + * + * x86_64 has a fixed-length stack frame. + */ +#ifdef CONFIG_X86_32 +# ifdef CONFIG_VM86 +# define TOP_OF_KERNEL_STACK_PADDING 16 +# else +# define TOP_OF_KERNEL_STACK_PADDING 8 +# endif +#else +# define TOP_OF_KERNEL_STACK_PADDING 0 +#endif + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + */ +#ifndef __ASSEMBLY__ +struct task_struct; +#include +#include + +struct thread_info { + unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ + u32 status; /* thread synchronous flags */ +#ifdef CONFIG_SMP + u32 cpu; /* current CPU */ +#endif +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} + +#else /* !__ASSEMBLY__ */ + +#include + +#endif + +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_SSBD 5 /* Speculative store bypass disable */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */ +#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ +#define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_PATCH_PENDING 13 /* pending live patching update */ +#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */ +#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ +#define TIF_NOTSC 16 /* TSC is not accessible in userland */ +#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ +#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ +#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ +#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ +#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ + +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SSBD (1 << TIF_SSBD) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH) +#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) +#define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) +#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD) +#define _TIF_NOCPUID (1 << TIF_NOCPUID) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) +#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) +#define _TIF_ADDR32 (1 << TIF_ADDR32) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif + +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ + _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) + +#define STACK_WARN (THREAD_SIZE/8) + +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + +/* + * Walks up the stack frames to make sure that the specified object is + * entirely contained by a single stack frame. + * + * Returns: + * GOOD_FRAME if within a frame + * BAD_STACK if placed across a frame boundary (or outside stack) + * NOT_STACK unable to determine (no frame pointers, etc) + */ +static inline int arch_within_stack_frames(const void * const stack, + const void * const stackend, + const void *obj, unsigned long len) +{ +#if defined(CONFIG_FRAME_POINTER) + const void *frame = NULL; + const void *oldframe; + + oldframe = __builtin_frame_address(1); + if (oldframe) + frame = __builtin_frame_address(2); + /* + * low ----------------------------------------------> high + * [saved bp][saved ip][args][local vars][saved bp][saved ip] + * ^----------------^ + * allow copies only within here + */ + while (stack <= frame && frame < stackend) { + /* + * If obj + len extends past the last frame, this + * check won't pass and the next frame will be 0, + * causing us to bail out and correctly report + * the copy as invalid. + */ + if (obj + len <= frame) + return obj >= oldframe + 2 * sizeof(void *) ? + GOOD_FRAME : BAD_STACK; + oldframe = frame; + frame = *(const void * const *)frame; + } + return BAD_STACK; +#else + return NOT_STACK; +#endif +} + +#endif /* !__ASSEMBLY__ */ + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_COMPAT +#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ + +#define arch_set_restart_data(restart) \ + do { restart->arch_data = current_thread_info()->status; } while (0) + +#endif + +#ifdef CONFIG_X86_32 +#define in_ia32_syscall() true +#else +#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ + current_thread_info()->status & TS_COMPAT) +#endif + +extern void arch_task_cache_init(void); +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +extern void arch_release_task_struct(struct task_struct *tsk); +extern void arch_setup_new_exec(void); +#define arch_setup_new_exec arch_setup_new_exec +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h new file mode 100644 index 000000000..8ac563abb --- /dev/null +++ b/arch/x86/include/asm/time.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TIME_H +#define _ASM_X86_TIME_H + +#include +#include + +extern void hpet_time_init(void); +extern void time_init(void); +extern bool pit_timer_init(void); + +extern struct clock_event_device *global_clock_event; + +#endif /* _ASM_X86_TIME_H */ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h new file mode 100644 index 000000000..7365dd4ac --- /dev/null +++ b/arch/x86/include/asm/timer.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TIMER_H +#define _ASM_X86_TIMER_H +#include +#include +#include +#include + +#define TICK_SIZE (tick_nsec / 1000) + +unsigned long long native_sched_clock(void); +extern void recalibrate_cpu_khz(void); + +extern int no_timer_check; + +extern bool using_native_sched_clock(void); + +/* + * We use the full linear equation: f(x) = a + b*x, in order to allow + * a continuous function in the face of dynamic freq changes. + * + * Continuity means that when our frequency changes our slope (b); we want to + * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t. + * + * Without an offset (a) the above would not be possible. + * + * See the comment near cycles_2_ns() for details on how we compute (b). + */ +struct cyc2ns_data { + u32 cyc2ns_mul; + u32 cyc2ns_shift; + u64 cyc2ns_offset; +}; /* 16 bytes */ + +extern void cyc2ns_read_begin(struct cyc2ns_data *); +extern void cyc2ns_read_end(void); + +#endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h new file mode 100644 index 000000000..956e41453 --- /dev/null +++ b/arch/x86/include/asm/timex.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TIMEX_H +#define _ASM_X86_TIMEX_H + +#include +#include + +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + +/* Assume we use the PIT time source for the clock tick */ +#define CLOCK_TICK_RATE PIT_TICK_RATE + +#define ARCH_HAS_READ_CURRENT_TIMER + +#endif /* _ASM_X86_TIMEX_H */ diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h new file mode 100644 index 000000000..580636cdc --- /dev/null +++ b/arch/x86/include/asm/tlb.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TLB_H +#define _ASM_X86_TLB_H + +#define tlb_flush tlb_flush +static inline void tlb_flush(struct mmu_gather *tlb); + +#include + +static inline void tlb_flush(struct mmu_gather *tlb) +{ + unsigned long start = 0UL, end = TLB_FLUSH_ALL; + unsigned int stride_shift = tlb_get_unmap_shift(tlb); + + if (!tlb->fullmm && !tlb->need_flush_all) { + start = tlb->start; + end = tlb->end; + } + + flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables); +} + +/* + * While x86 architecture in general requires an IPI to perform TLB + * shootdown, enablement code for several hypervisors overrides + * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing + * a hypercall. To keep software pagetable walkers safe in this case we + * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment + * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * for more details. + */ +static inline void __tlb_remove_table(void *table) +{ + free_page_and_swap_cache(table); +} + +#endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h new file mode 100644 index 000000000..1ad56eb3e --- /dev/null +++ b/arch/x86/include/asm/tlbbatch.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_X86_TLBBATCH_H +#define _ARCH_X86_TLBBATCH_H + +#include + +struct arch_tlbflush_unmap_batch { + /* + * Each bit set is a CPU that potentially has a TLB entry for one of + * the PFNs being flushed.. + */ + struct cpumask cpumask; +}; + +#endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h new file mode 100644 index 000000000..cda3118f3 --- /dev/null +++ b/arch/x86/include/asm/tlbflush.h @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TLBFLUSH_H +#define _ASM_X86_TLBFLUSH_H + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +void __flush_tlb_all(void); + +#define TLB_FLUSH_ALL -1UL +#define TLB_GENERATION_INVALID 0 + +void cr4_update_irqsoff(unsigned long set, unsigned long clear); +unsigned long cr4_read_shadow(void); + +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits_irqsoff(unsigned long mask) +{ + cr4_update_irqsoff(mask, 0); +} + +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits_irqsoff(unsigned long mask) +{ + cr4_update_irqsoff(0, mask); +} + +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_set_bits_irqsoff(mask); + local_irq_restore(flags); +} + +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_clear_bits_irqsoff(mask); + local_irq_restore(flags); +} + +#ifndef MODULE +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 + +struct tlb_context { + u64 ctx_id; + u64 tlb_gen; +}; + +struct tlb_state { + /* + * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts + * are on. This means that it may not match current->active_mm, + * which will contain the previous user mm when we're in lazy TLB + * mode even if we've already switched back to swapper_pg_dir. + * + * During switch_mm_irqs_off(), loaded_mm will be set to + * LOADED_MM_SWITCHING during the brief interrupts-off window + * when CR3 and loaded_mm would otherwise be inconsistent. This + * is for nmi_uaccess_okay()'s benefit. + */ + struct mm_struct *loaded_mm; + +#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL) + + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_spec; + }; + + u16 loaded_mm_asid; + u16 next_asid; + + /* + * If set we changed the page tables in such a way that we + * needed an invalidation of all contexts (aka. PCIDs / ASIDs). + * This tells us to go invalidate all the non-loaded ctxs[] + * on the next context switch. + * + * The current ctx was kept up-to-date as it ran and does not + * need to be invalidated. + */ + bool invalidate_other; + + /* + * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate + * the corresponding user PCID needs a flush next time we + * switch to it; see SWITCH_TO_USER_CR3. + */ + unsigned short user_pcid_flush_mask; + + /* + * Access to this CR4 shadow and to H/W CR4 is protected by + * disabling interrupts when modifying either one. + */ + unsigned long cr4; + + /* + * This is a list of all contexts that might exist in the TLB. + * There is one per ASID that we use, and the ASID (what the + * CPU calls PCID) is the index into ctxts. + * + * For each context, ctx_id indicates which mm the TLB's user + * entries came from. As an invariant, the TLB will never + * contain entries that are out-of-date as when that mm reached + * the tlb_gen in the list. + * + * To be clear, this means that it's legal for the TLB code to + * flush the TLB without updating tlb_gen. This can happen + * (for now, at least) due to paravirt remote flushes. + * + * NB: context 0 is a bit special, since it's also used by + * various bits of init code. This is fine -- code that + * isn't aware of PCID will end up harmlessly flushing + * context 0. + */ + struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; +}; +DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate); + +struct tlb_state_shared { + /* + * We can be in one of several states: + * + * - Actively using an mm. Our CPU's bit will be set in + * mm_cpumask(loaded_mm) and is_lazy == false; + * + * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit + * will not be set in mm_cpumask(&init_mm) and is_lazy == false. + * + * - Lazily using a real mm. loaded_mm != &init_mm, our bit + * is set in mm_cpumask(loaded_mm), but is_lazy == true. + * We're heuristically guessing that the CR3 load we + * skipped more than makes up for the overhead added by + * lazy mode. + */ + bool is_lazy; +}; +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); + +bool nmi_uaccess_okay(void); +#define nmi_uaccess_okay nmi_uaccess_okay + +/* Initialize cr4 shadow for this CPU. */ +static inline void cr4_init_shadow(void) +{ + this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); +} + +extern unsigned long mmu_cr4_features; +extern u32 *trampoline_cr4_features; + +extern void initialize_tlbstate_and_flush(void); + +/* + * TLB flushing: + * + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + */ +struct flush_tlb_info { + /* + * We support several kinds of flushes. + * + * - Fully flush a single mm. .mm will be set, .end will be + * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to + * which the IPI sender is trying to catch us up. + * + * - Partially flush a single mm. .mm will be set, .start and + * .end will indicate the range, and .new_tlb_gen will be set + * such that the changes between generation .new_tlb_gen-1 and + * .new_tlb_gen are entirely contained in the indicated range. + * + * - Fully flush all mms whose tlb_gens have been updated. .mm + * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen + * will be zero. + */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + u64 new_tlb_gen; + unsigned int initiating_cpu; + u8 stride_shift; + u8 freed_tables; +}; + +void flush_tlb_local(void); +void flush_tlb_one_user(unsigned long addr); +void flush_tlb_one_kernel(unsigned long addr); +void flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +#ifdef CONFIG_PARAVIRT +#include +#endif + +#define flush_tlb_mm(mm) \ + flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) + +#define flush_tlb_range(vma, start, end) \ + flush_tlb_mm_range((vma)->vm_mm, start, end, \ + ((vma)->vm_flags & VM_HUGETLB) \ + ? huge_page_shift(hstate_vma(vma)) \ + : PAGE_SHIFT, false) + +extern void flush_tlb_all(void); +extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned int stride_shift, + bool freed_tables); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) +{ + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); +} + +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) +{ + /* + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. + */ + return atomic64_inc_return(&mm->context.tlb_gen); +} + +static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm) +{ + inc_mm_tlb_gen(mm); + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); +} + +extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + +static inline bool pte_flags_need_flush(unsigned long oldflags, + unsigned long newflags, + bool ignore_access) +{ + /* + * Flags that require a flush when cleared but not when they are set. + * Only include flags that would not trigger spurious page-faults. + * Non-present entries are not cached. Hardware would set the + * dirty/access bit if needed without a fault. + */ + const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | + _PAGE_ACCESSED; + const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | + _PAGE_SOFTW3 | _PAGE_SOFTW4; + const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | + _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | + _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | + _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; + unsigned long diff = oldflags ^ newflags; + + BUILD_BUG_ON(flush_on_clear & software_flags); + BUILD_BUG_ON(flush_on_clear & flush_on_change); + BUILD_BUG_ON(flush_on_change & software_flags); + + /* Ignore software flags */ + diff &= ~software_flags; + + if (ignore_access) + diff &= ~_PAGE_ACCESSED; + + /* + * Did any of the 'flush_on_clear' flags was clleared set from between + * 'oldflags' and 'newflags'? + */ + if (diff & oldflags & flush_on_clear) + return true; + + /* Flush on modified flags. */ + if (diff & flush_on_change) + return true; + + /* Ensure there are no flags that were left behind */ + if (IS_ENABLED(CONFIG_DEBUG_VM) && + (diff & ~(flush_on_clear | software_flags | flush_on_change))) { + VM_WARN_ON_ONCE(1); + return true; + } + + return false; +} + +/* + * pte_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace PTEs. + */ +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pte_flags(oldpte) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pte_pfn(oldpte) != pte_pfn(newpte)) + return true; + + /* + * check PTE flags; ignore access-bit; see comment in + * ptep_clear_flush_young(). + */ + return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), + true); +} +#define pte_needs_flush pte_needs_flush + +/* + * huge_pmd_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace huge PMDs. + */ +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) + return true; + + /* + * check PMD flags; do not ignore access-bit; see + * pmdp_clear_flush_young(). + */ + return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), + false); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + +#endif /* !MODULE */ + +static inline void __native_tlb_flush_global(unsigned long cr4) +{ + native_write_cr4(cr4 ^ X86_CR4_PGE); + native_write_cr4(cr4); +} +#endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h new file mode 100644 index 000000000..458c891a8 --- /dev/null +++ b/arch/x86/include/asm/topology.h @@ -0,0 +1,230 @@ +/* + * Written by: Matthew Dobson, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ +#ifndef _ASM_X86_TOPOLOGY_H +#define _ASM_X86_TOPOLOGY_H + +/* + * to preserve the visibility of NUMA_NO_NODE definition, + * moved to there from here. May be used independent of + * CONFIG_NUMA. + */ +#include + +#ifdef CONFIG_NUMA +#include + +#include +#include + +/* Mappings between logical cpu number and node number */ +DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +/* + * override generic percpu implementation of cpu_to_node + */ +extern int __cpu_to_node(int cpu); +#define cpu_to_node __cpu_to_node + +extern int early_cpu_to_node(int cpu); + +#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Same function but used if called before per_cpu areas are setup */ +static inline int early_cpu_to_node(int cpu) +{ + return early_per_cpu(x86_cpu_to_node_map, cpu); +} + +#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Mappings between node number and cpus on that node. */ +extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +extern const struct cpumask *cpumask_of_node(int node); +#else +/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return node_to_cpumask_map[node]; +} +#endif + +extern void setup_node_to_cpumask_map(void); + +#define pcibus_to_node(bus) __pcibus_to_node(bus) + +extern int __node_distance(int, int); +#define node_distance(a, b) __node_distance(a, b) + +#else /* !CONFIG_NUMA */ + +static inline int numa_node_id(void) +{ + return 0; +} +/* + * indicate override: + */ +#define numa_node_id numa_node_id + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} + +static inline void setup_node_to_cpumask_map(void) { } + +#endif + +#include + +extern const struct cpumask *cpu_coregroup_mask(int cpu); +extern const struct cpumask *cpu_clustergroup_mask(int cpu); + +#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) +#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_ppin(cpu) (cpu_data(cpu).ppin) + +extern unsigned int __max_die_per_package; + +#ifdef CONFIG_SMP +#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu)) +#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) +#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu)) +#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +extern unsigned int __max_logical_packages; +#define topology_max_packages() (__max_logical_packages) + +static inline int topology_max_die_per_package(void) +{ + return __max_die_per_package; +} + +extern int __max_smt_threads; + +static inline int topology_max_smt_threads(void) +{ + return __max_smt_threads; +} + +int topology_update_package_map(unsigned int apicid, unsigned int cpu); +int topology_update_die_map(unsigned int dieid, unsigned int cpu); +int topology_phys_to_logical_pkg(unsigned int pkg); +int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); +bool topology_is_primary_thread(unsigned int cpu); +bool topology_smt_supported(void); +#else +#define topology_max_packages() (1) +static inline int +topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +static inline int +topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_phys_to_logical_die(unsigned int die, + unsigned int cpu) { return 0; } +static inline int topology_max_die_per_package(void) { return 1; } +static inline int topology_max_smt_threads(void) { return 1; } +static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } +static inline bool topology_smt_supported(void) { return false; } +#endif + +static inline void arch_fix_phys_package_id(int num, u32 slot) +{ +} + +struct pci_bus; +int x86_pci_root_bus_node(int bus); +void x86_pci_root_bus_resources(int bus, struct list_head *resources); + +extern bool x86_topology_update; + +#ifdef CONFIG_SCHED_MC_PRIO +#include + +DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority); +extern unsigned int __read_mostly sysctl_sched_itmt_enabled; + +/* Interface to set priority of a cpu */ +void sched_set_itmt_core_prio(int prio, int core_cpu); + +/* Interface to notify scheduler that system supports ITMT */ +int sched_set_itmt_support(void); + +/* Interface to notify scheduler that system revokes ITMT support */ +void sched_clear_itmt_support(void); + +#else /* CONFIG_SCHED_MC_PRIO */ + +#define sysctl_sched_itmt_enabled 0 +static inline void sched_set_itmt_core_prio(int prio, int core_cpu) +{ +} +static inline int sched_set_itmt_support(void) +{ + return 0; +} +static inline void sched_clear_itmt_support(void) +{ +} +#endif /* CONFIG_SCHED_MC_PRIO */ + +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64) +#include + +DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key); + +#define arch_scale_freq_invariant() static_branch_likely(&arch_scale_freq_key) + +DECLARE_PER_CPU(unsigned long, arch_freq_scale); + +static inline long arch_scale_freq_capacity(int cpu) +{ + return per_cpu(arch_freq_scale, cpu); +} +#define arch_scale_freq_capacity arch_scale_freq_capacity + +extern void arch_set_max_freq_ratio(bool turbo_disabled); +extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled); +#else +static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } +static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { } +#endif + +extern void arch_scale_freq_tick(void); +#define arch_scale_freq_tick arch_scale_freq_tick + +#ifdef CONFIG_ACPI_CPPC_LIB +void init_freq_invariance_cppc(void); +#define arch_init_invariance_cppc init_freq_invariance_cppc +#endif + +#endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h new file mode 100644 index 000000000..f0f9bcdb7 --- /dev/null +++ b/arch/x86/include/asm/trace/common.h @@ -0,0 +1,12 @@ +#ifndef _ASM_TRACE_COMMON_H +#define _ASM_TRACE_COMMON_H + +#ifdef CONFIG_TRACING +DECLARE_STATIC_KEY_FALSE(trace_pagefault_key); +#define trace_pagefault_enabled() \ + static_branch_unlikely(&trace_pagefault_key) +#else +static inline bool trace_pagefault_enabled(void) { return false; } +#endif + +#endif diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h new file mode 100644 index 000000000..6b1e87194 --- /dev/null +++ b/arch/x86/include/asm/trace/exceptions.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM exceptions + +#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_FAULT_H + +#include +#include + +extern int trace_pagefault_reg(void); +extern void trace_pagefault_unreg(void); + +DECLARE_EVENT_CLASS(x86_exceptions, + + TP_PROTO(unsigned long address, struct pt_regs *regs, + unsigned long error_code), + + TP_ARGS(address, regs, error_code), + + TP_STRUCT__entry( + __field( unsigned long, address ) + __field( unsigned long, ip ) + __field( unsigned long, error_code ) + ), + + TP_fast_assign( + __entry->address = address; + __entry->ip = regs->ip; + __entry->error_code = error_code; + ), + + TP_printk("address=%ps ip=%ps error_code=0x%lx", + (void *)__entry->address, (void *)__entry->ip, + __entry->error_code) ); + +#define DEFINE_PAGE_FAULT_EVENT(name) \ +DEFINE_EVENT_FN(x86_exceptions, name, \ + TP_PROTO(unsigned long address, struct pt_regs *regs, \ + unsigned long error_code), \ + TP_ARGS(address, regs, error_code), \ + trace_pagefault_reg, trace_pagefault_unreg); + +DEFINE_PAGE_FAULT_EVENT(page_fault_user); +DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE exceptions +#endif /* _TRACE_PAGE_FAULT_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h new file mode 100644 index 000000000..4645a6334 --- /dev/null +++ b/arch/x86/include/asm/trace/fpu.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM x86_fpu + +#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FPU_H + +#include + +DECLARE_EVENT_CLASS(x86_fpu, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu), + + TP_STRUCT__entry( + __field(struct fpu *, fpu) + __field(bool, load_fpu) + __field(u64, xfeatures) + __field(u64, xcomp_bv) + ), + + TP_fast_assign( + __entry->fpu = fpu; + __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); + if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { + __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures; + __entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv; + } + ), + TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", + __entry->fpu, + __entry->load_fpu, + __entry->xfeatures, + __entry->xcomp_bv + ) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_before_save, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_after_save, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_init_state, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_dropped, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_copy_src, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed, + TP_PROTO(struct fpu *fpu), + TP_ARGS(fpu) +); + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/trace/ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fpu +#endif /* _TRACE_FPU_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h new file mode 100644 index 000000000..a8e5a7a2b --- /dev/null +++ b/arch/x86/include/asm/trace/hyperv.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hyperv + +#if !defined(_TRACE_HYPERV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HYPERV_H + +#include + +#if IS_ENABLED(CONFIG_HYPERV) + +TRACE_EVENT(hyperv_mmu_flush_tlb_multi, + TP_PROTO(const struct cpumask *cpus, + const struct flush_tlb_info *info), + TP_ARGS(cpus, info), + TP_STRUCT__entry( + __field(unsigned int, ncpus) + __field(struct mm_struct *, mm) + __field(unsigned long, addr) + __field(unsigned long, end) + ), + TP_fast_assign(__entry->ncpus = cpumask_weight(cpus); + __entry->mm = info->mm; + __entry->addr = info->start; + __entry->end = info->end; + ), + TP_printk("ncpus %d mm %p addr %lx, end %lx", + __entry->ncpus, __entry->mm, + __entry->addr, __entry->end) + ); + +TRACE_EVENT(hyperv_nested_flush_guest_mapping, + TP_PROTO(u64 as, int ret), + TP_ARGS(as, ret), + + TP_STRUCT__entry( + __field(u64, as) + __field(int, ret) + ), + TP_fast_assign(__entry->as = as; + __entry->ret = ret; + ), + TP_printk("address space %llx ret %d", __entry->as, __entry->ret) + ); + +TRACE_EVENT(hyperv_nested_flush_guest_mapping_range, + TP_PROTO(u64 as, int ret), + TP_ARGS(as, ret), + + TP_STRUCT__entry( + __field(u64, as) + __field(int, ret) + ), + TP_fast_assign(__entry->as = as; + __entry->ret = ret; + ), + TP_printk("address space %llx ret %d", __entry->as, __entry->ret) + ); + +TRACE_EVENT(hyperv_send_ipi_mask, + TP_PROTO(const struct cpumask *cpus, + int vector), + TP_ARGS(cpus, vector), + TP_STRUCT__entry( + __field(unsigned int, ncpus) + __field(int, vector) + ), + TP_fast_assign(__entry->ncpus = cpumask_weight(cpus); + __entry->vector = vector; + ), + TP_printk("ncpus %d vector %x", + __entry->ncpus, __entry->vector) + ); + +TRACE_EVENT(hyperv_send_ipi_one, + TP_PROTO(int cpu, + int vector), + TP_ARGS(cpu, vector), + TP_STRUCT__entry( + __field(int, cpu) + __field(int, vector) + ), + TP_fast_assign(__entry->cpu = cpu; + __entry->vector = vector; + ), + TP_printk("cpu %d vector %x", + __entry->cpu, __entry->vector) + ); + +#endif /* CONFIG_HYPERV */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/trace/ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE hyperv +#endif /* _TRACE_HYPERV_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h new file mode 100644 index 000000000..88e7f0f3b --- /dev/null +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -0,0 +1,383 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq_vectors + +#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_VECTORS_H + +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC + +DECLARE_EVENT_CLASS(x86_irq_vector, + + TP_PROTO(int vector), + + TP_ARGS(vector), + + TP_STRUCT__entry( + __field( int, vector ) + ), + + TP_fast_assign( + __entry->vector = vector; + ), + + TP_printk("vector=%d", __entry->vector) ); + +#define DEFINE_IRQ_VECTOR_EVENT(name) \ +DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ + TP_PROTO(int vector), \ + TP_ARGS(vector), NULL, NULL); \ +DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ + TP_PROTO(int vector), \ + TP_ARGS(vector), NULL, NULL); + +/* + * local_timer - called when entering/exiting a local timer interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(local_timer); + +/* + * spurious_apic - called when entering/exiting a spurious apic vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(spurious_apic); + +/* + * error_apic - called when entering/exiting an error apic vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(error_apic); + +/* + * x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi); + +#ifdef CONFIG_IRQ_WORK +/* + * irq_work - called when entering/exiting a irq work interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(irq_work); + +/* + * We must dis-allow sampling irq_work_exit() because perf event sampling + * itself can cause irq_work, which would lead to an infinite loop; + * + * 1) irq_work_exit happens + * 2) generates perf sample + * 3) generates irq_work + * 4) goto 1 + */ +TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); +#endif + +/* + * The ifdef is required because that tracepoint macro hell emits tracepoint + * code in files which include this header even if the tracepoint is not + * enabled. Brilliant stuff that. + */ +#ifdef CONFIG_SMP +/* + * reschedule - called when entering/exiting a reschedule vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(reschedule); + +/* + * call_function - called when entering/exiting a call function interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(call_function); + +/* + * call_function_single - called when entering/exiting a call function + * single interrupt vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(call_function_single); +#endif + +#ifdef CONFIG_X86_MCE_THRESHOLD +/* + * threshold_apic - called when entering/exiting a threshold apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(threshold_apic); +#endif + +#ifdef CONFIG_X86_MCE_AMD +/* + * deferred_error_apic - called when entering/exiting a deferred apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); +#endif + +#ifdef CONFIG_X86_THERMAL_VECTOR +/* + * thermal_apic - called when entering/exiting a thermal apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(thermal_apic); +#endif + +TRACE_EVENT(vector_config, + + TP_PROTO(unsigned int irq, unsigned int vector, + unsigned int cpu, unsigned int apicdest), + + TP_ARGS(irq, vector, cpu, apicdest), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( unsigned int, cpu ) + __field( unsigned int, apicdest ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = vector; + __entry->cpu = cpu; + __entry->apicdest = apicdest; + ), + + TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x", + __entry->irq, __entry->vector, __entry->cpu, + __entry->apicdest) +); + +DECLARE_EVENT_CLASS(vector_mod, + + TP_PROTO(unsigned int irq, unsigned int vector, + unsigned int cpu, unsigned int prev_vector, + unsigned int prev_cpu), + + TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( unsigned int, cpu ) + __field( unsigned int, prev_vector ) + __field( unsigned int, prev_cpu ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = vector; + __entry->cpu = cpu; + __entry->prev_vector = prev_vector; + __entry->prev_cpu = prev_cpu; + + ), + + TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u", + __entry->irq, __entry->vector, __entry->cpu, + __entry->prev_vector, __entry->prev_cpu) +); + +#define DEFINE_IRQ_VECTOR_MOD_EVENT(name) \ +DEFINE_EVENT_FN(vector_mod, name, \ + TP_PROTO(unsigned int irq, unsigned int vector, \ + unsigned int cpu, unsigned int prev_vector, \ + unsigned int prev_cpu), \ + TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update); +DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear); + +DECLARE_EVENT_CLASS(vector_reserve, + + TP_PROTO(unsigned int irq, int ret), + + TP_ARGS(irq, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret) +); + +#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name) \ +DEFINE_EVENT_FN(vector_reserve, name, \ + TP_PROTO(unsigned int irq, int ret), \ + TP_ARGS(irq, ret), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed); +DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve); + +TRACE_EVENT(vector_alloc, + + TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, + int ret), + + TP_ARGS(irq, vector, reserved, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( bool, reserved ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = ret < 0 ? 0 : vector; + __entry->reserved = reserved; + __entry->ret = ret > 0 ? 0 : ret; + ), + + TP_printk("irq=%u vector=%u reserved=%d ret=%d", + __entry->irq, __entry->vector, + __entry->reserved, __entry->ret) +); + +TRACE_EVENT(vector_alloc_managed, + + TP_PROTO(unsigned int irq, unsigned int vector, + int ret), + + TP_ARGS(irq, vector, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = ret < 0 ? 0 : vector; + __entry->ret = ret > 0 ? 0 : ret; + ), + + TP_printk("irq=%u vector=%u ret=%d", + __entry->irq, __entry->vector, __entry->ret) +); + +DECLARE_EVENT_CLASS(vector_activate, + + TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, + bool reserve), + + TP_ARGS(irq, is_managed, can_reserve, reserve), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_managed ) + __field( bool, can_reserve ) + __field( bool, reserve ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_managed = is_managed; + __entry->can_reserve = can_reserve; + __entry->reserve = reserve; + ), + + TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d", + __entry->irq, __entry->is_managed, __entry->can_reserve, + __entry->reserve) +); + +#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ +DEFINE_EVENT_FN(vector_activate, name, \ + TP_PROTO(unsigned int irq, bool is_managed, \ + bool can_reserve, bool reserve), \ + TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); +DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); + +TRACE_EVENT(vector_teardown, + + TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved), + + TP_ARGS(irq, is_managed, has_reserved), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_managed ) + __field( bool, has_reserved ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_managed = is_managed; + __entry->has_reserved = has_reserved; + ), + + TP_printk("irq=%u is_managed=%d has_reserved=%d", + __entry->irq, __entry->is_managed, __entry->has_reserved) +); + +TRACE_EVENT(vector_setup, + + TP_PROTO(unsigned int irq, bool is_legacy, int ret), + + TP_ARGS(irq, is_legacy, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_legacy ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_legacy = is_legacy; + __entry->ret = ret; + ), + + TP_printk("irq=%u is_legacy=%d ret=%d", + __entry->irq, __entry->is_legacy, __entry->ret) +); + +TRACE_EVENT(vector_free_moved, + + TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector, + bool is_managed), + + TP_ARGS(irq, cpu, vector, is_managed), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, cpu ) + __field( unsigned int, vector ) + __field( bool, is_managed ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->cpu = cpu; + __entry->vector = vector; + __entry->is_managed = is_managed; + ), + + TP_printk("irq=%u cpu=%u vector=%u is_managed=%d", + __entry->irq, __entry->cpu, __entry->vector, + __entry->is_managed) +); + + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE irq_vectors +#endif /* _TRACE_IRQ_VECTORS_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h new file mode 100644 index 000000000..7061a5650 --- /dev/null +++ b/arch/x86/include/asm/trace_clock.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRACE_CLOCK_H +#define _ASM_X86_TRACE_CLOCK_H + +#include +#include + +#ifdef CONFIG_X86_TSC + +extern u64 notrace trace_clock_x86_tsc(void); + +# define ARCH_TRACE_CLOCKS \ + { trace_clock_x86_tsc, "x86-tsc", .in_ns = 0 }, + +#else /* !CONFIG_X86_TSC */ + +#define ARCH_TRACE_CLOCKS + +#endif + +#endif /* _ASM_X86_TRACE_CLOCK_H */ diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h new file mode 100644 index 000000000..10b1de500 --- /dev/null +++ b/arch/x86/include/asm/trap_pf.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAP_PF_H +#define _ASM_X86_TRAP_PF_H + +/* + * Page fault error code bits: + * + * bit 0 == 0: no page found 1: protection fault + * bit 1 == 0: read access 1: write access + * bit 2 == 0: kernel-mode access 1: user-mode access + * bit 3 == 1: use of reserved bit detected + * bit 4 == 1: fault was an instruction fetch + * bit 5 == 1: protection keys block access + * bit 15 == 1: SGX MMU page-fault + */ +enum x86_pf_error_code { + X86_PF_PROT = 1 << 0, + X86_PF_WRITE = 1 << 1, + X86_PF_USER = 1 << 2, + X86_PF_RSVD = 1 << 3, + X86_PF_INSTR = 1 << 4, + X86_PF_PK = 1 << 5, + X86_PF_SGX = 1 << 15, +}; + +#endif /* _ASM_X86_TRAP_PF_H */ diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h new file mode 100644 index 000000000..f5d2325aa --- /dev/null +++ b/arch/x86/include/asm/trapnr.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAPNR_H +#define _ASM_X86_TRAPNR_H + +/* Interrupts/Exceptions */ + +#define X86_TRAP_DE 0 /* Divide-by-zero */ +#define X86_TRAP_DB 1 /* Debug */ +#define X86_TRAP_NMI 2 /* Non-maskable Interrupt */ +#define X86_TRAP_BP 3 /* Breakpoint */ +#define X86_TRAP_OF 4 /* Overflow */ +#define X86_TRAP_BR 5 /* Bound Range Exceeded */ +#define X86_TRAP_UD 6 /* Invalid Opcode */ +#define X86_TRAP_NM 7 /* Device Not Available */ +#define X86_TRAP_DF 8 /* Double Fault */ +#define X86_TRAP_OLD_MF 9 /* Coprocessor Segment Overrun */ +#define X86_TRAP_TS 10 /* Invalid TSS */ +#define X86_TRAP_NP 11 /* Segment Not Present */ +#define X86_TRAP_SS 12 /* Stack Segment Fault */ +#define X86_TRAP_GP 13 /* General Protection Fault */ +#define X86_TRAP_PF 14 /* Page Fault */ +#define X86_TRAP_SPURIOUS 15 /* Spurious Interrupt */ +#define X86_TRAP_MF 16 /* x87 Floating-Point Exception */ +#define X86_TRAP_AC 17 /* Alignment Check */ +#define X86_TRAP_MC 18 /* Machine Check */ +#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */ +#define X86_TRAP_VE 20 /* Virtualization Exception */ +#define X86_TRAP_CP 21 /* Control Protection Exception */ +#define X86_TRAP_VC 29 /* VMM Communication Exception */ +#define X86_TRAP_IRET 32 /* IRET Exception */ + +#endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h new file mode 100644 index 000000000..47ecfff2c --- /dev/null +++ b/arch/x86/include/asm/traps.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAPS_H +#define _ASM_X86_TRAPS_H + +#include +#include + +#include +#include +#include /* TRAP_TRACE, ... */ +#include + +#ifdef CONFIG_X86_64 +asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); +asmlinkage __visible notrace +struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs); +void __init trap_init(void); +asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); +#endif + +extern bool ibt_selftest(void); + +#ifdef CONFIG_X86_F00F_BUG +/* For handling the FOOF bug */ +void handle_invalid_op(struct pt_regs *regs); +#endif + +static inline int get_si_code(unsigned long condition) +{ + if (condition & DR_STEP) + return TRAP_TRACE; + else if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) + return TRAP_HWBKPT; + else + return TRAP_BRKPT; +} + +extern int panic_on_unrecovered_nmi; + +void math_emulate(struct math_emu_info *); + +bool fault_in_kernel_space(unsigned long address); + +#ifdef CONFIG_VMAP_STACK +void __noreturn handle_stack_overflow(struct pt_regs *regs, + unsigned long fault_address, + struct stack_info *info); +#endif + +#endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h new file mode 100644 index 000000000..fbdc3d951 --- /dev/null +++ b/arch/x86/include/asm/tsc.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 TSC related functions + */ +#ifndef _ASM_X86_TSC_H +#define _ASM_X86_TSC_H + +#include +#include + +/* + * Standard way to access the cycle counter. + */ +typedef unsigned long long cycles_t; + +extern unsigned int cpu_khz; +extern unsigned int tsc_khz; + +extern void disable_TSC(void); + +static inline cycles_t get_cycles(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return 0; + return rdtsc(); +} +#define get_cycles get_cycles + +extern struct system_counterval_t convert_art_to_tsc(u64 art); +extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); + +extern void tsc_early_init(void); +extern void tsc_init(void); +extern unsigned long calibrate_delay_is_known(void); +extern void mark_tsc_unstable(char *reason); +extern int unsynchronized_tsc(void); +extern int check_tsc_unstable(void); +extern void mark_tsc_async_resets(char *reason); +extern unsigned long native_calibrate_cpu_early(void); +extern unsigned long native_calibrate_tsc(void); +extern unsigned long long native_sched_clock_from_tsc(u64 tsc); + +extern int tsc_clocksource_reliable; +#ifdef CONFIG_X86_TSC +extern bool tsc_async_resets; +#else +# define tsc_async_resets false +#endif + +/* + * Boot-time check whether the TSCs are synchronized across + * all CPUs/cores: + */ +#ifdef CONFIG_X86_TSC +extern bool tsc_store_and_check_tsc_adjust(bool bootcpu); +extern void tsc_verify_tsc_adjust(bool resume); +extern void check_tsc_sync_source(int cpu); +extern void check_tsc_sync_target(void); +#else +static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; } +static inline void tsc_verify_tsc_adjust(bool resume) { } +static inline void check_tsc_sync_source(int cpu) { } +static inline void check_tsc_sync_target(void) { } +#endif + +extern int notsc_setup(char *); +extern void tsc_save_sched_clock_state(void); +extern void tsc_restore_sched_clock_state(void); + +unsigned long cpu_khz_from_msr(void); + +#endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h new file mode 100644 index 000000000..6ca0c661c --- /dev/null +++ b/arch/x86/include/asm/uaccess.h @@ -0,0 +1,672 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UACCESS_H +#define _ASM_X86_UACCESS_H +/* + * User space memory access functions + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP +static inline bool pagefault_disabled(void); +# define WARN_ON_IN_IRQ() \ + WARN_ON_ONCE(!in_task() && !pagefault_disabled()) +#else +# define WARN_ON_IN_IRQ() +#endif + +/** + * access_ok - Checks if a user space pointer is valid + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + * + * Return: true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + */ +#define access_ok(addr, size) \ +({ \ + WARN_ON_IN_IRQ(); \ + likely(__access_ok(addr, size)); \ +}) + +#include + +extern int __get_user_1(void); +extern int __get_user_2(void); +extern int __get_user_4(void); +extern int __get_user_8(void); +extern int __get_user_nocheck_1(void); +extern int __get_user_nocheck_2(void); +extern int __get_user_nocheck_4(void); +extern int __get_user_nocheck_8(void); +extern int __get_user_bad(void); + +#define __uaccess_begin() stac() +#define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) + +/* + * This is the smallest unsigned integer type that can fit a value + * (up to 'long long') + */ +#define __inttype(x) __typeof__( \ + __typefits(x,char, \ + __typefits(x,short, \ + __typefits(x,int, \ + __typefits(x,long,0ULL))))) + +#define __typefits(x,type,not) \ + __builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not) + +/* + * This is used for both get_user() and __get_user() to expand to + * the proper special function call that has odd calling conventions + * due to returning both a value and an error, and that depends on + * the size of the pointer passed in. + * + * Careful: we have to cast the result to the type of the pointer + * for sign reasons. + * + * The use of _ASM_DX as the register specifier is a bit of a + * simplification, as gcc only cares about it as the starting point + * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits + * (%ecx being the next register in gcc's x86 register sequence), and + * %rdx on 64 bits. + * + * Clang/LLVM cares about the size of the register, but still wants + * the base register for something that ends up being a pair. + */ +#define do_get_user_call(fn,x,ptr) \ +({ \ + int __ret_gu; \ + register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + __chk_user_ptr(ptr); \ + asm volatile("call __" #fn "_%P4" \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ + : "0" (ptr), "i" (sizeof(*(ptr)))); \ + instrument_get_user(__val_gu); \ + (x) = (__force __typeof__(*(ptr))) __val_gu; \ + __builtin_expect(__ret_gu, 0); \ +}) + +/** + * get_user - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Return: zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); }) + +/** + * __get_user - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Return: zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define __get_user(x,ptr) do_get_user_call(get_user_nocheck,x,ptr) + + +#ifdef CONFIG_X86_32 +#define __put_user_goto_u64(x, addr, label) \ + asm_volatile_goto("\n" \ + "1: movl %%eax,0(%1)\n" \ + "2: movl %%edx,4(%1)\n" \ + _ASM_EXTABLE_UA(1b, %l2) \ + _ASM_EXTABLE_UA(2b, %l2) \ + : : "A" (x), "r" (addr) \ + : : label) + +#else +#define __put_user_goto_u64(x, ptr, label) \ + __put_user_goto(x, ptr, "q", "er", label) +#endif + +extern void __put_user_bad(void); + +/* + * Strange magic calling convention: pointer in %ecx, + * value in %eax(:%edx), return value in %ecx. clobbers %rbx + */ +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); +extern void __put_user_nocheck_1(void); +extern void __put_user_nocheck_2(void); +extern void __put_user_nocheck_4(void); +extern void __put_user_nocheck_8(void); + +/* + * ptr must be evaluated and assigned to the temporary __ptr_pu before + * the assignment of x to __val_pu, to avoid any function calls + * involved in the ptr expression (possibly implicitly generated due + * to KASAN) from clobbering %ax. + */ +#define do_put_user_call(fn,x,ptr) \ +({ \ + int __ret_pu; \ + void __user *__ptr_pu; \ + register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ + __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \ + __chk_user_ptr(__ptr); \ + __ptr_pu = __ptr; \ + __val_pu = __x; \ + asm volatile("call __" #fn "_%P[size]" \ + : "=c" (__ret_pu), \ + ASM_CALL_CONSTRAINT \ + : "0" (__ptr_pu), \ + "r" (__val_pu), \ + [size] "i" (sizeof(*(ptr))) \ + :"ebx"); \ + instrument_put_user(__x, __ptr, sizeof(*(ptr))); \ + __builtin_expect(__ret_pu, 0); \ +}) + +/** + * put_user - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Return: zero on success, or -EFAULT on error. + */ +#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); }) + +/** + * __put_user - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Return: zero on success, or -EFAULT on error. + */ +#define __put_user(x, ptr) do_put_user_call(put_user_nocheck,x,ptr) + +#define __put_user_size(x, ptr, size, label) \ +do { \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ + __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \ + __chk_user_ptr(__ptr); \ + switch (size) { \ + case 1: \ + __put_user_goto(__x, __ptr, "b", "iq", label); \ + break; \ + case 2: \ + __put_user_goto(__x, __ptr, "w", "ir", label); \ + break; \ + case 4: \ + __put_user_goto(__x, __ptr, "l", "ir", label); \ + break; \ + case 8: \ + __put_user_goto_u64(__x, __ptr, label); \ + break; \ + default: \ + __put_user_bad(); \ + } \ + instrument_put_user(__x, __ptr, size); \ +} while (0) + +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#ifdef CONFIG_X86_32 +#define __get_user_asm_u64(x, ptr, label) do { \ + unsigned int __gu_low, __gu_high; \ + const unsigned int __user *__gu_ptr; \ + __gu_ptr = (const void __user *)(ptr); \ + __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \ + __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \ + (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \ +} while (0) +#else +#define __get_user_asm_u64(x, ptr, label) \ + __get_user_asm(x, ptr, "q", "=r", label) +#endif + +#define __get_user_size(x, ptr, size, label) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: { \ + unsigned char x_u8__; \ + __get_user_asm(x_u8__, ptr, "b", "=q", label); \ + (x) = x_u8__; \ + break; \ + } \ + case 2: \ + __get_user_asm(x, ptr, "w", "=r", label); \ + break; \ + case 4: \ + __get_user_asm(x, ptr, "l", "=r", label); \ + break; \ + case 8: \ + __get_user_asm_u64(x, ptr, label); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ + instrument_get_user(x); \ +} while (0) + +#define __get_user_asm(x, addr, itype, ltype, label) \ + asm_volatile_goto("\n" \ + "1: mov"itype" %[umem],%[output]\n" \ + _ASM_EXTABLE_UA(1b, %l2) \ + : [output] ltype(x) \ + : [umem] "m" (__m(addr)) \ + : : label) + +#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#ifdef CONFIG_X86_32 +#define __get_user_asm_u64(x, ptr, retval) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + asm volatile("\n" \ + "1: movl %[lowbits],%%eax\n" \ + "2: movl %[highbits],%%edx\n" \ + "3:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX_DX, \ + %[errout]) \ + _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX_DX, \ + %[errout]) \ + : [errout] "=r" (retval), \ + [output] "=&A"(x) \ + : [lowbits] "m" (__m(__ptr)), \ + [highbits] "m" __m(((u32 __user *)(__ptr)) + 1), \ + "0" (retval)); \ +}) + +#else +#define __get_user_asm_u64(x, ptr, retval) \ + __get_user_asm(x, ptr, retval, "q") +#endif + +#define __get_user_size(x, ptr, size, retval) \ +do { \ + unsigned char x_u8__; \ + \ + retval = 0; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm(x_u8__, ptr, retval, "b"); \ + (x) = x_u8__; \ + break; \ + case 2: \ + __get_user_asm(x, ptr, retval, "w"); \ + break; \ + case 4: \ + __get_user_asm(x, ptr, retval, "l"); \ + break; \ + case 8: \ + __get_user_asm_u64(x, ptr, retval); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype) \ + asm volatile("\n" \ + "1: mov"itype" %[umem],%[output]\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX, \ + %[errout]) \ + : [errout] "=r" (err), \ + [output] "=a" (x) \ + : [umem] "m" (__m(addr)), \ + "0" (err)) + +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#endif // CONFIG_X86_32 +#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + int __err = 0; \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + CC_SET(z) \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[errout]) \ + : CC_OUT(z) (success), \ + [errout] "+r" (__err), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory"); \ + if (unlikely(__err)) \ + goto label; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +/* + * Unlike the normal CMPXCHG, use output GPR for both success/fail and error. + * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are + * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses + * both ESI and EDI for the memory operand, compilation will fail if the error + * is an input+output as there will be no register available for input. + */ +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + int __result; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "mov $0, %[result]\n\t" \ + "setz %b[result]\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[result]) \ + : [result] "=q" (__result), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory", "cc"); \ + if (unlikely(__result < 0)) \ + goto label; \ + if (unlikely(!__result)) \ + *_old = __old; \ + likely(__result); }) +#endif // CONFIG_X86_32 +#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT + +/* FIXME: this hack is definitely wrong -AK */ +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct __user *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_goto(x, addr, itype, ltype, label) \ + asm_volatile_goto("\n" \ + "1: mov"itype" %0,%1\n" \ + _ASM_EXTABLE_UA(1b, %l2) \ + : : ltype(x), "m" (__m(addr)) \ + : : label) + +extern unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); + +extern __must_check long strnlen_user(const char __user *str, long n); + +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned len); +#define copy_mc_to_kernel copy_mc_to_kernel + +unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned len); +#endif + +/* + * movsl can be slow when source and dest are not both 8-byte aligned + */ +#ifdef CONFIG_X86_INTEL_USERCOPY +extern struct movsl_mask { + int mask; +} ____cacheline_aligned_in_smp movsl_mask; +#endif + +#define ARCH_HAS_NOCACHE_UACCESS 1 + +#ifdef CONFIG_X86_32 +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); +# include +#else +# include +#endif + +/* + * The "unsafe" user accesses aren't really "unsafe", but the naming + * is a big fat warning: you have to not only do the access_ok() + * checking before using them, but you have to surround them with the + * user_access_begin/end() pair. + */ +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr,len))) + return 0; + __uaccess_begin_nospec(); + return 1; +} +#define user_access_begin(a,b) user_access_begin(a,b) +#define user_access_end() __uaccess_end() + +#define user_access_save() smap_save() +#define user_access_restore(x) smap_restore(x) + +#define unsafe_put_user(x, ptr, label) \ + __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) + +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define unsafe_get_user(x, ptr, err_label) \ +do { \ + __inttype(*(ptr)) __gu_val; \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ +} while (0) +#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define unsafe_get_user(x, ptr, err_label) \ +do { \ + int __gu_err; \ + __inttype(*(ptr)) __gu_val; \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ + if (unlikely(__gu_err)) goto err_label; \ +} while (0) +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +extern void __try_cmpxchg_user_wrong_size(void); + +#ifndef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ + __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) +#endif + +/* + * Force the pointer to u to match the size expected by the asm helper. + * clang/LLVM compiles all cases and only discards the unused paths after + * processing errors, which breaks i386 if the pointer is an 8-byte value. + */ +#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + bool __ret; \ + __chk_user_ptr(_ptr); \ + switch (sizeof(*(_ptr))) { \ + case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ + (__force u8 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ + (__force u16 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ + (__force u32 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ + default: __try_cmpxchg_user_wrong_size(); \ + } \ + __ret; }) + +/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ +#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + int __ret = -EFAULT; \ + __uaccess_begin_nospec(); \ + __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ +_label: \ + __uaccess_end(); \ + __ret; \ + }) + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_kernel_nofault(dst, src, type, err_label) \ + __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ + sizeof(type), err_label) +#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __kr_err; \ + \ + __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ + sizeof(type), __kr_err); \ + if (unlikely(__kr_err)) \ + goto err_label; \ +} while (0) +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define __put_kernel_nofault(dst, src, type, err_label) \ + __put_user_size(*((type *)(src)), (__force type __user *)(dst), \ + sizeof(type), err_label) + +#endif /* _ASM_X86_UACCESS_H */ + diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h new file mode 100644 index 000000000..388a40660 --- /dev/null +++ b/arch/x86/include/asm/uaccess_32.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UACCESS_32_H +#define _ASM_X86_UACCESS_32_H + +/* + * User space memory access functions + */ +#include +#include +#include + +unsigned long __must_check __copy_user_ll + (void *to, const void *from, unsigned long n); +unsigned long __must_check __copy_from_user_ll_nocache_nozero + (void *to, const void __user *from, unsigned long n); + +static __always_inline unsigned long __must_check +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return __copy_user_ll((__force void *)to, from, n); +} + +static __always_inline unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return __copy_user_ll(to, (__force const void *)from, n); +} + +static __always_inline unsigned long +__copy_from_user_inatomic_nocache(void *to, const void __user *from, + unsigned long n) +{ + return __copy_from_user_ll_nocache_nozero(to, from, n); +} + +#endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h new file mode 100644 index 000000000..d13d71af5 --- /dev/null +++ b/arch/x86/include/asm/uaccess_64.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UACCESS_64_H +#define _ASM_X86_UACCESS_64_H + +/* + * User space memory access functions + */ +#include +#include +#include +#include +#include +#include + +/* + * Copy To/From Userspace + */ + +/* Handles exceptions in both to and from, but doesn't do access_ok */ +__must_check unsigned long +copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); +__must_check unsigned long +copy_user_generic_string(void *to, const void *from, unsigned len); +__must_check unsigned long +copy_user_generic_unrolled(void *to, const void *from, unsigned len); + +static __always_inline __must_check unsigned long +copy_user_generic(void *to, const void *from, unsigned len) +{ + unsigned ret; + + /* + * If CPU has ERMS feature, use copy_user_enhanced_fast_string. + * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. + * Otherwise, use copy_user_generic_unrolled. + */ + alternative_call_2(copy_user_generic_unrolled, + copy_user_generic_string, + X86_FEATURE_REP_GOOD, + copy_user_enhanced_fast_string, + X86_FEATURE_ERMS, + ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), + "=d" (len)), + "1" (to), "2" (from), "3" (len) + : "memory", "rcx", "r8", "r9", "r10", "r11"); + return ret; +} + +static __always_inline __must_check unsigned long +raw_copy_from_user(void *dst, const void __user *src, unsigned long size) +{ + return copy_user_generic(dst, (__force void *)src, size); +} + +static __always_inline __must_check unsigned long +raw_copy_to_user(void __user *dst, const void *src, unsigned long size) +{ + return copy_user_generic((__force void *)dst, src, size); +} + +extern long __copy_user_nocache(void *dst, const void __user *src, + unsigned size, int zerorest); + +extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); +extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, + size_t len); + +static inline int +__copy_from_user_inatomic_nocache(void *dst, const void __user *src, + unsigned size) +{ + kasan_check_write(dst, size); + return __copy_user_nocache(dst, src, size, 0); +} + +static inline int +__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) +{ + kasan_check_write(dst, size); + return __copy_user_flushcache(dst, src, size); +} + +/* + * Zero Userspace. + */ + +__must_check unsigned long +clear_user_original(void __user *addr, unsigned long len); +__must_check unsigned long +clear_user_rep_good(void __user *addr, unsigned long len); +__must_check unsigned long +clear_user_erms(void __user *addr, unsigned long len); + +static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) +{ + might_fault(); + stac(); + + /* + * No memory constraint because it doesn't change any memory gcc + * knows about. + */ + asm volatile( + "1:\n\t" + ALTERNATIVE_3("rep stosb", + "call clear_user_erms", ALT_NOT(X86_FEATURE_FSRM), + "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS), + "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD)) + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT + : "a" (0) + /* rep_good clobbers %rdx */ + : "rdx"); + + clac(); + + return size; +} + +static __always_inline unsigned long clear_user(void __user *to, unsigned long n) +{ + if (access_ok(to, n)) + return __clear_user(to, n); + return n; +} +#endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h new file mode 100644 index 000000000..aeed98c3c --- /dev/null +++ b/arch/x86/include/asm/umip.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_UMIP_H +#define _ASM_X86_UMIP_H + +#include +#include + +#ifdef CONFIG_X86_UMIP +bool fixup_umip_exception(struct pt_regs *regs); +#else +static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; } +#endif /* CONFIG_X86_UMIP */ +#endif /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h new file mode 100644 index 000000000..761173ccc --- /dev/null +++ b/arch/x86/include/asm/unistd.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UNISTD_H +#define _ASM_X86_UNISTD_H 1 + +#include + + +# ifdef CONFIG_X86_32 + +# include +# define __ARCH_WANT_STAT64 +# define __ARCH_WANT_SYS_IPC +# define __ARCH_WANT_SYS_OLD_MMAP +# define __ARCH_WANT_SYS_OLD_SELECT + +# define IA32_NR_syscalls (__NR_syscalls) + +# else + +# include +# include +# include +# define __ARCH_WANT_SYS_TIME +# define __ARCH_WANT_SYS_UTIME +# define __ARCH_WANT_COMPAT_STAT +# define __ARCH_WANT_COMPAT_SYS_PREADV64 +# define __ARCH_WANT_COMPAT_SYS_PWRITEV64 +# define __ARCH_WANT_COMPAT_SYS_PREADV64V2 +# define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +# define X32_NR_syscalls (__NR_x32_syscalls) +# define IA32_NR_syscalls (__NR_ia32_syscalls) + +# endif + +# define NR_syscalls (__NR_syscalls) + +# define __ARCH_WANT_NEW_STAT +# define __ARCH_WANT_OLD_READDIR +# define __ARCH_WANT_OLD_STAT +# define __ARCH_WANT_SYS_ALARM +# define __ARCH_WANT_SYS_FADVISE64 +# define __ARCH_WANT_SYS_GETHOSTNAME +# define __ARCH_WANT_SYS_GETPGRP +# define __ARCH_WANT_SYS_NICE +# define __ARCH_WANT_SYS_OLDUMOUNT +# define __ARCH_WANT_SYS_OLD_GETRLIMIT +# define __ARCH_WANT_SYS_OLD_UNAME +# define __ARCH_WANT_SYS_PAUSE +# define __ARCH_WANT_SYS_SIGNAL +# define __ARCH_WANT_SYS_SIGPENDING +# define __ARCH_WANT_SYS_SIGPROCMASK +# define __ARCH_WANT_SYS_SOCKETCALL +# define __ARCH_WANT_SYS_TIME32 +# define __ARCH_WANT_SYS_UTIME32 +# define __ARCH_WANT_SYS_WAITPID +# define __ARCH_WANT_SYS_FORK +# define __ARCH_WANT_SYS_VFORK +# define __ARCH_WANT_SYS_CLONE +# define __ARCH_WANT_SYS_CLONE3 + +#endif /* _ASM_X86_UNISTD_H */ diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h new file mode 100644 index 000000000..7cede4dc2 --- /dev/null +++ b/arch/x86/include/asm/unwind.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UNWIND_H +#define _ASM_X86_UNWIND_H + +#include +#include +#include +#include +#include + +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) + +struct unwind_state { + struct stack_info stack_info; + unsigned long stack_mask; + struct task_struct *task; + int graph_idx; +#if defined(CONFIG_RETHOOK) + struct llist_node *kr_cur; +#endif + bool error; +#if defined(CONFIG_UNWINDER_ORC) + bool signal, full_regs; + unsigned long sp, bp, ip; + struct pt_regs *regs, *prev_regs; +#elif defined(CONFIG_UNWINDER_FRAME_POINTER) + bool got_irq; + unsigned long *bp, *orig_sp, ip; + /* + * If non-NULL: The current frame is incomplete and doesn't contain a + * valid BP. When looking for the next frame, use this instead of the + * non-existent saved BP. + */ + unsigned long *next_bp; + struct pt_regs *regs; +#else + unsigned long *sp; +#endif +}; + +void __unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame); +bool unwind_next_frame(struct unwind_state *state); +unsigned long unwind_get_return_address(struct unwind_state *state); +unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); + +static inline bool unwind_done(struct unwind_state *state) +{ + return state->stack_info.type == STACK_TYPE_UNKNOWN; +} + +static inline bool unwind_error(struct unwind_state *state) +{ + return state->error; +} + +static inline +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame) +{ + first_frame = first_frame ? : get_stack_pointer(task, regs); + + __unwind_start(state, task, regs, first_frame); +} + +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) +/* + * If 'partial' returns true, only the iret frame registers are valid. + */ +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) +{ + if (unwind_done(state)) + return NULL; + + if (partial) { +#ifdef CONFIG_UNWINDER_ORC + *partial = !state->full_regs; +#else + *partial = false; +#endif + } + + return state->regs; +} +#else +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) +{ + return NULL; +} +#endif + +#ifdef CONFIG_UNWINDER_ORC +void unwind_init(void); +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, + void *orc, size_t orc_size); +#else +static inline void unwind_init(void) {} +static inline +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, + void *orc, size_t orc_size) {} +#endif + +static inline +unsigned long unwind_recover_rethook(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ +#ifdef CONFIG_RETHOOK + if (is_rethook_trampoline(addr)) + return rethook_find_ret_addr(state->task, (unsigned long)addr_p, + &state->kr_cur); +#endif + return addr; +} + +/* Recover the return address modified by rethook and ftrace_graph. */ +static inline +unsigned long unwind_recover_ret_addr(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ + unsigned long ret; + + ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, addr_p); + return unwind_recover_rethook(state, ret, addr_p); +} + +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + if (task == current) \ + val = READ_ONCE(x); \ + else \ + val = READ_ONCE_NOCHECK(x); \ + val; \ +}) + +static inline bool task_on_another_cpu(struct task_struct *task) +{ +#ifdef CONFIG_SMP + return task != current && task->on_cpu; +#else + return false; +#endif +} + +#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h new file mode 100644 index 000000000..f66fbe653 --- /dev/null +++ b/arch/x86/include/asm/unwind_hints.h @@ -0,0 +1,74 @@ +#ifndef _ASM_X86_UNWIND_HINTS_H +#define _ASM_X86_UNWIND_HINTS_H + +#include + +#include "orc_types.h" + +#ifdef __ASSEMBLY__ + +.macro UNWIND_HINT_EMPTY + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 +.endm + +.macro UNWIND_HINT_ENTRY + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 +.endm + +.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 + .if \base == %rsp + .if \indirect + .set sp_reg, ORC_REG_SP_INDIRECT + .else + .set sp_reg, ORC_REG_SP + .endif + .elseif \base == %rbp + .set sp_reg, ORC_REG_BP + .elseif \base == %rdi + .set sp_reg, ORC_REG_DI + .elseif \base == %rdx + .set sp_reg, ORC_REG_DX + .elseif \base == %r10 + .set sp_reg, ORC_REG_R10 + .else + .error "UNWIND_HINT_REGS: bad base register" + .endif + + .set sp_offset, \offset + + .if \partial + .set type, UNWIND_HINT_TYPE_REGS_PARTIAL + .elseif \extra == 0 + .set type, UNWIND_HINT_TYPE_REGS_PARTIAL + .set sp_offset, \offset + (16*8) + .else + .set type, UNWIND_HINT_TYPE_REGS + .endif + + UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type +.endm + +.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 + UNWIND_HINT_REGS base=\base offset=\offset partial=1 +.endm + +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC +.endm + +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + +#else + +#define UNWIND_HINT_FUNC \ + UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h new file mode 100644 index 000000000..678fb546f --- /dev/null +++ b/arch/x86/include/asm/uprobes.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_UPROBES_H +#define _ASM_UPROBES_H +/* + * User-space Probes (UProbes) for x86 + * + * Copyright (C) IBM Corporation, 2008-2011 + * Authors: + * Srikar Dronamraju + * Jim Keniston + */ + +#include + +typedef u8 uprobe_opcode_t; + +#define MAX_UINSN_BYTES 16 +#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */ + +#define UPROBE_SWBP_INSN 0xcc +#define UPROBE_SWBP_INSN_SIZE 1 + +struct uprobe_xol_ops; + +struct arch_uprobe { + union { + u8 insn[MAX_UINSN_BYTES]; + u8 ixol[MAX_UINSN_BYTES]; + }; + + const struct uprobe_xol_ops *ops; + + union { + struct { + s32 offs; + u8 ilen; + u8 opc1; + } branch; + struct { + u8 fixups; + u8 ilen; + } defparam; + struct { + u8 reg_offset; /* to the start of pt_regs */ + u8 ilen; + } push; + }; +}; + +struct arch_uprobe_task { +#ifdef CONFIG_X86_64 + unsigned long saved_scratch_register; +#endif + unsigned int saved_trap_nr; + unsigned int saved_tf; +}; + +#endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h new file mode 100644 index 000000000..413c91746 --- /dev/null +++ b/arch/x86/include/asm/user.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_USER_H +#define _ASM_X86_USER_H + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +#include + +struct user_ymmh_regs { + /* 16 * 16 bytes for each YMMH-reg */ + __u32 ymmh_space[64]; +}; + +struct user_xstate_header { + __u64 xfeatures; + __u64 reserved1[2]; + __u64 reserved2[5]; +}; + +/* + * The structure layout of user_xstateregs, used for exporting the + * extended register state through ptrace and core-dump (NT_X86_XSTATE note) + * interfaces will be same as the memory layout of xsave used by the processor + * (except for the bytes 464..511, which can be used by the software) and hence + * the size of this structure varies depending on the features supported by the + * processor and OS. The size of the structure that users need to use can be + * obtained by doing: + * cpuid_count(0xd, 0, &eax, &ptrace_xstateregs_struct_size, &ecx, &edx); + * i.e., cpuid.(eax=0xd,ecx=0).ebx will be the size that user (debuggers, etc.) + * need to use. + * + * For now, only the first 8 bytes of the software usable bytes[464..471] will + * be used and will be set to OS enabled xstate mask (which is same as the + * 64bit mask returned by the xgetbv's xCR0). Users (analyzing core dump + * remotely, etc.) can use this mask as well as the mask saved in the + * xstate_hdr bytes and interpret what states the processor/OS supports + * and what states are in modified/initialized conditions for the + * particular process/thread. + * + * Also when the user modifies certain state FP/SSE/etc through the + * ptrace interface, they must ensure that the header.xfeatures + * bytes[512..519] of the memory layout are updated correspondingly. + * i.e., for example when FP state is modified to a non-init state, + * header.xfeatures's bit 0 must be set to '1', when SSE is modified to + * non-init state, header.xfeatures's bit 1 must to be set to '1', etc. + */ +#define USER_XSTATE_FX_SW_WORDS 6 +#define USER_XSTATE_XCR0_WORD 0 + +struct user_xstateregs { + struct { + __u64 fpx_space[58]; + __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; + } i387; + struct user_xstate_header header; + struct user_ymmh_regs ymmh; + /* further processor state extensions go here */ +}; + +#endif /* _ASM_X86_USER_H */ diff --git a/arch/x86/include/asm/user32.h b/arch/x86/include/asm/user32.h new file mode 100644 index 000000000..fa577312f --- /dev/null +++ b/arch/x86/include/asm/user32.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_USER32_H +#define _ASM_X86_USER32_H + +/* IA32 compatible user structures for ptrace. + * These should be used for 32bit coredumps too. */ + +struct user_i387_ia32_struct { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ +}; + +/* FSAVE frame with extensions */ +struct user32_fxsr_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; /* not compatible to 64bit twd */ + unsigned short fop; + int fip; + int fcs; + int foo; + int fos; + int mxcsr; + int reserved; + int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + int xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + int padding[56]; +}; + +struct user_regs_struct32 { + __u32 ebx, ecx, edx, esi, edi, ebp, eax; + unsigned short ds, __ds, es, __es; + unsigned short fs, __fs, gs, __gs; + __u32 orig_eax, eip; + unsigned short cs, __cs; + __u32 eflags, esp; + unsigned short ss, __ss; +}; + +struct user32 { + struct user_regs_struct32 regs; /* Where the registers are actually stored */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + struct user_i387_ia32_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + __u32 u_tsize; /* Text segment size (pages). */ + __u32 u_dsize; /* Data segment size (pages). */ + __u32 u_ssize; /* Stack segment size (pages). */ + __u32 start_code; /* Starting virtual address of text. */ + __u32 start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + __u32 signal; /* Signal that caused the core dump. */ + int reserved; /* No __u32er used */ + __u32 u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + __u32 u_fpstate; /* Math Co-processor pointer. */ + __u32 magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + int u_debugreg[8]; +}; + + +#endif /* _ASM_X86_USER32_H */ diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h new file mode 100644 index 000000000..8963915e5 --- /dev/null +++ b/arch/x86/include/asm/user_32.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_USER_32_H +#define _ASM_X86_USER_32_H + +#include +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user (under + linux we use the 'trad-core' bfd). There are quite a number of + obstacles to being able to view the contents of the floating point + registers, and until these are solved you will not be able to view the + contents of them. Actually, you can read in the core file and look at + the contents of the user struct to find out what the floating point + registers contain. + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. +*/ + +/* + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + * + * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for + * interacting with the FXSR-format floating point environment. Floating + * point data can be accessed in the regular format in the usual manner, + * and both the standard and SIMD floating point data can be accessed via + * the new ptrace requests. In either case, changes to the FPU environment + * will be reflected in the task's state as expected. + */ + +struct user_i387_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ +}; + +struct user_fxsr_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +}; + +/* + * This is the old layout of "struct pt_regs", and + * is still the layout used by user mode (the new + * pt_regs doesn't have all registers as the kernel + * doesn't use the extra segment registers) + */ +struct user_regs_struct { + unsigned long bx; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long bp; + unsigned long ax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +}; + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ +struct user{ +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* ptrace does not yet supply these. Someday.... */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + struct user_i387_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + int reserved; /* No longer used */ + unsigned long u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + struct user_i387_struct *u_fpstate; /* Math Co-processor pointer. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + int u_debugreg[8]; +}; + +#endif /* _ASM_X86_USER_32_H */ diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h new file mode 100644 index 000000000..1dd10f07c --- /dev/null +++ b/arch/x86/include/asm/user_64.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_USER_64_H +#define _ASM_X86_USER_64_H + +#include +#include +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user. + There are quite a number of obstacles to being able to view the + contents of the floating point registers, and until these are + solved you will not be able to view the contents of them. + Actually, you can read in the core file and look at the contents of + the user struct to find out what the floating point registers + contain. + + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. */ + +/* + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + * + * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for + * interacting with the FXSR-format floating point environment. Floating + * point data can be accessed in the regular format in the usual manner, + * and both the standard and SIMD floating point data can be accessed via + * the new ptrace requests. In either case, changes to the FPU environment + * will be reflected in the task's state as expected. + * + * x86-64 support by Andi Kleen. + */ + +/* This matches the 64bit FXSAVE format as defined by AMD. It is the same + as the 32bit format defined by Intel, except that the selector:offset pairs + for data and eip are replaced with flat 64bit pointers. */ +struct user_i387_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; /* Note this is not the same as + the 32bit/x87/FSAVE twd */ + unsigned short fop; + __u64 rip; + __u64 rdp; + __u32 mxcsr; + __u32 mxcsr_mask; + __u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + __u32 padding[24]; +}; + +/* + * Segment register layout in coredumps. + */ +struct user_regs_struct { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; + unsigned long fs_base; + unsigned long gs_base; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; +}; + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ + +struct user { +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* ptrace does not yet supply these. Someday.... */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + int pad0; + struct user_i387_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + int reserved; /* No longer used */ + int pad1; + unsigned long u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + struct user_i387_struct *u_fpstate; /* Math Co-processor pointer. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + unsigned long u_debugreg[8]; + unsigned long error_code; /* CPU error code or 0 */ + unsigned long fault_address; /* CR3 or 0 */ +}; + +#endif /* _ASM_X86_USER_64_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h new file mode 100644 index 000000000..1b6455f88 --- /dev/null +++ b/arch/x86/include/asm/uv/bios.h @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_UV_BIOS_H +#define _ASM_X86_UV_BIOS_H + +/* + * UV BIOS layer definitions. + * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) Russ Anderson + */ + +#include + +/* + * Values for the BIOS calls. It is passed as the first * argument in the + * BIOS call. Passing any other value in the first argument will result + * in a BIOS_STATUS_UNIMPLEMENTED return status. + */ +enum uv_bios_cmd { + UV_BIOS_COMMON, + UV_BIOS_GET_SN_INFO, + UV_BIOS_FREQ_BASE, + UV_BIOS_WATCHLIST_ALLOC, + UV_BIOS_WATCHLIST_FREE, + UV_BIOS_MEMPROTECT, + UV_BIOS_GET_PARTITION_ADDR, + UV_BIOS_SET_LEGACY_VGA_TARGET +}; + +#define UV_BIOS_EXTRA 0x10000 +#define UV_BIOS_GET_PCI_TOPOLOGY 0x10001 +#define UV_BIOS_GET_GEOINFO 0x10003 + +#define UV_BIOS_EXTRA_OP_MEM_COPYIN 0x1000 +#define UV_BIOS_EXTRA_OP_MEM_COPYOUT 0x2000 +#define UV_BIOS_EXTRA_OP_MASK 0x0fff +#define UV_BIOS_EXTRA_GET_HEAPSIZE 1 +#define UV_BIOS_EXTRA_INSTALL_HEAP 2 +#define UV_BIOS_EXTRA_MASTER_NASID 3 +#define UV_BIOS_EXTRA_OBJECT_COUNT (10|UV_BIOS_EXTRA_OP_MEM_COPYOUT) +#define UV_BIOS_EXTRA_ENUM_OBJECTS (12|UV_BIOS_EXTRA_OP_MEM_COPYOUT) +#define UV_BIOS_EXTRA_ENUM_PORTS (13|UV_BIOS_EXTRA_OP_MEM_COPYOUT) + +/* + * Status values returned from a BIOS call. + */ +enum { + BIOS_STATUS_MORE_PASSES = 1, + BIOS_STATUS_SUCCESS = 0, + BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, + BIOS_STATUS_EINVAL = -EINVAL, + BIOS_STATUS_UNAVAIL = -EBUSY, + BIOS_STATUS_ABORT = -EINTR, +}; + +/* Address map parameters */ +struct uv_gam_parameters { + u64 mmr_base; + u64 gru_base; + u8 mmr_shift; /* Convert PNode to MMR space offset */ + u8 gru_shift; /* Convert PNode to GRU space offset */ + u8 gpa_shift; /* Size of offset field in GRU phys addr */ + u8 unused1; +}; + +/* UV_TABLE_GAM_RANGE_ENTRY values */ +#define UV_GAM_RANGE_TYPE_UNUSED 0 /* End of table */ +#define UV_GAM_RANGE_TYPE_RAM 1 /* Normal RAM */ +#define UV_GAM_RANGE_TYPE_NVRAM 2 /* Non-volatile memory */ +#define UV_GAM_RANGE_TYPE_NV_WINDOW 3 /* NVMDIMM block window */ +#define UV_GAM_RANGE_TYPE_NV_MAILBOX 4 /* NVMDIMM mailbox */ +#define UV_GAM_RANGE_TYPE_HOLE 5 /* Unused address range */ +#define UV_GAM_RANGE_TYPE_MAX 6 + +/* The structure stores PA bits 56:26, for 64MB granularity */ +#define UV_GAM_RANGE_SHFT 26 /* 64MB */ + +struct uv_gam_range_entry { + char type; /* Entry type: GAM_RANGE_TYPE_UNUSED, etc. */ + char unused1; + u16 nasid; /* HNasid */ + u16 sockid; /* Socket ID, high bits of APIC ID */ + u16 pnode; /* Index to MMR and GRU spaces */ + u32 unused2; + u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */ +}; + +#define UV_AT_SIZE 8 /* 7 character arch type + NULL char */ +struct uv_arch_type_entry { + char archtype[UV_AT_SIZE]; +}; + +#define UV_SYSTAB_SIG "UVST" +#define UV_SYSTAB_VERSION_1 1 /* UV2/3 BIOS version */ +#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */ +#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */ +#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */ +#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */ +#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3 + +#define UV_SYSTAB_VERSION_UV5 0x500 /* UV5 GAM base version */ +#define UV_SYSTAB_VERSION_UV5_LATEST UV_SYSTAB_VERSION_UV5 + +#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */ +#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */ +#define UV_SYSTAB_TYPE_GAM_RNG_TBL 2 /* GAM entry table */ +#define UV_SYSTAB_TYPE_ARCH_TYPE 3 /* UV arch type */ +#define UV_SYSTAB_TYPE_MAX 4 + +/* + * The UV system table describes specific firmware + * capabilities available to the Linux kernel at runtime. + */ +struct uv_systab { + char signature[4]; /* must be UV_SYSTAB_SIG */ + u32 revision; /* distinguish different firmware revs */ + u64 function; /* BIOS runtime callback function ptr */ + u32 size; /* systab size (starting with _VERSION_UV4) */ + struct { + u32 type:8; /* type of entry */ + u32 offset:24; /* byte offset from struct start to entry */ + } entry[1]; /* additional entries follow */ +}; +extern struct uv_systab *uv_systab; + +#define UV_BIOS_MAXSTRING 128 +struct uv_bios_hub_info { + unsigned int id; + union { + struct { + unsigned long long this_part:1; + unsigned long long is_shared:1; + unsigned long long is_disabled:1; + } fields; + struct { + unsigned long long flags; + unsigned long long reserved; + } b; + } f; + char name[UV_BIOS_MAXSTRING]; + char location[UV_BIOS_MAXSTRING]; + unsigned int ports; +}; + +struct uv_bios_port_info { + unsigned int port; + unsigned int conn_id; + unsigned int conn_port; +}; + +/* (... end of definitions from UV BIOS ...) */ + +enum { + BIOS_FREQ_BASE_PLATFORM = 0, + BIOS_FREQ_BASE_INTERVAL_TIMER = 1, + BIOS_FREQ_BASE_REALTIME_CLOCK = 2 +}; + +union partition_info_u { + u64 val; + struct { + u64 hub_version : 8, + partition_id : 16, + coherence_id : 16, + region_size : 24; + }; +}; + +enum uv_memprotect { + UV_MEMPROT_RESTRICT_ACCESS, + UV_MEMPROT_ALLOW_AMO, + UV_MEMPROT_ALLOW_RW +}; + +extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *); +extern s64 uv_bios_freq_base(u64, u64 *); +extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, + unsigned long *); +extern int uv_bios_mq_watchlist_free(int, int); +extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); +extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); +extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); + +extern s64 uv_bios_get_master_nasid(u64 sz, u64 *nasid); +extern s64 uv_bios_get_heapsize(u64 nasid, u64 sz, u64 *heap_sz); +extern s64 uv_bios_install_heap(u64 nasid, u64 sz, u64 *heap); +extern s64 uv_bios_obj_count(u64 nasid, u64 sz, u64 *objcnt); +extern s64 uv_bios_enum_objs(u64 nasid, u64 sz, u64 *objbuf); +extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 sz, u64 *portbuf); +extern s64 uv_bios_get_geoinfo(u64 nasid, u64 sz, u64 *geo); +extern s64 uv_bios_get_pci_topology(u64 sz, u64 *buf); + +extern int uv_bios_init(void); +extern unsigned long get_uv_systab_phys(bool msg); + +extern unsigned long sn_rtc_cycles_per_second; +extern int uv_type; +extern long sn_partition_id; +extern long sn_coherency_id; +extern long sn_region_size; +extern long system_serial_number; +extern ssize_t uv_get_archtype(char *buf, int len); +extern int uv_get_hubless_system(void); + +extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ + +/* + * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details + */ +extern struct semaphore __efi_uv_runtime_lock; + +#endif /* _ASM_X86_UV_BIOS_H */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h new file mode 100644 index 000000000..648eb23fe --- /dev/null +++ b/arch/x86/include/asm/uv/uv.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UV_UV_H +#define _ASM_X86_UV_UV_H + +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; + +#ifdef CONFIG_X86_UV +#include + +#define UV_PROC_NODE "sgi_uv" + +static inline int uv(int uvtype) +{ + /* uv(0) is "any" */ + if (uvtype >= 0 && uvtype <= 30) + return 1 << uvtype; + return 1; +} + +extern unsigned long uv_systab_phys; + +extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR; +} +extern int is_uv_system(void); +extern int is_uv_hubbed(int uvtype); +extern void uv_cpu_init(void); +extern void uv_nmi_init(void); +extern void uv_system_init(void); + +#else /* !X86_UV */ + +static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } +static inline int is_uv_system(void) { return 0; } +static inline int is_uv_hubbed(int uv) { return 0; } +static inline void uv_cpu_init(void) { } +static inline void uv_system_init(void) { } + +#endif /* X86_UV */ + +#endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h new file mode 100644 index 000000000..027a9258d --- /dev/null +++ b/arch/x86/include/asm/uv/uv_geo.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Hewlett Packard Enterprise Development LP. All rights reserved. + */ + +#ifndef _ASM_UV_GEO_H +#define _ASM_UV_GEO_H + +/* Type declarations */ + +/* Size of a geoid_s structure (must be before decl. of geoid_u) */ +#define GEOID_SIZE 8 + +/* Fields common to all substructures */ +struct geo_common_s { + unsigned char type; /* What type of h/w is named by this geoid_s */ + unsigned char blade; + unsigned char slot; /* slot is IRU */ + unsigned char upos; + unsigned char rack; +}; + +/* Additional fields for particular types of hardware */ +struct geo_node_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_rtr_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_iocntl_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_pcicard_s { + struct geo_iocntl_s common; + char bus; /* Bus/widget number */ + char slot; /* PCI slot number */ +}; + +/* Subcomponents of a node */ +struct geo_cpu_s { + struct geo_node_s node; + unsigned char socket:4, /* Which CPU on the node */ + thread:4; + unsigned char core; +}; + +struct geo_mem_s { + struct geo_node_s node; + char membus; /* The memory bus on the node */ + char memslot; /* The memory slot on the bus */ +}; + +union geoid_u { + struct geo_common_s common; + struct geo_node_s node; + struct geo_iocntl_s iocntl; + struct geo_pcicard_s pcicard; + struct geo_rtr_s rtr; + struct geo_cpu_s cpu; + struct geo_mem_s mem; + char padsize[GEOID_SIZE]; +}; + +/* Defined constants */ + +#define GEO_MAX_LEN 48 + +#define GEO_TYPE_INVALID 0 +#define GEO_TYPE_MODULE 1 +#define GEO_TYPE_NODE 2 +#define GEO_TYPE_RTR 3 +#define GEO_TYPE_IOCNTL 4 +#define GEO_TYPE_IOCARD 5 +#define GEO_TYPE_CPU 6 +#define GEO_TYPE_MEM 7 +#define GEO_TYPE_MAX (GEO_TYPE_MEM+1) + +static inline int geo_rack(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.rack; +} + +static inline int geo_slot(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.upos; +} + +static inline int geo_blade(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.blade * 2 + g.common.slot; +} + +#endif /* _ASM_UV_GEO_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h new file mode 100644 index 000000000..d3e319791 --- /dev/null +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -0,0 +1,779 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV architectural definitions + * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_HUB_H +#define _ASM_X86_UV_UV_HUB_H + +#ifdef CONFIG_X86_64 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Addressing Terminology + * + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. + * + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * GPA - (global physical address) a socket physical address converted + * so that it can be used by the GRU as a global address. Socket + * physical addresses 1) need additional NASID (node) bits added + * to the high end of the address, and 2) unaliased if the + * partition does not have a physical address 0. In addition, on + * UV2 rev 1, GPAs need the gnode left shifted to bits 39 or 40. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * + * + * Memory/UV-HUB Processor Socket Address Format: + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) + * + * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). + * The actual values are configuration dependent and are set at + * boot time. M & N values are set by the hardware/BIOS at boot. + * + * + * APICID format + * NOTE!!!!!! This is the current format of the APICID. However, code + * should assume that this will change in the future. Use functions + * in this file for all APICID bit manipulations and conversion. + * + * 1111110000000000 + * 5432109876543210 + * pppppppppplc0cch Nehalem-EX (12 bits in hdw reg) + * ppppppppplcc0cch Westmere-EX (12 bits in hdw reg) + * pppppppppppcccch SandyBridge (15 bits in hdw reg) + * sssssssssss + * + * p = pnode bits + * l = socket number on board + * c = core + * h = hyperthread + * s = bits that are in the SOCKET_ID CSR + * + * Note: Processor may support fewer bits in the APICID register. The ACPI + * tables hold all 16 bits. Software needs to be aware of this. + * + * Unless otherwise specified, all references to APICID refer to + * the FULL value contained in ACPI tables, not the subset in the + * processor APICID register. + */ + +/* + * Maximum number of bricks in all partitions and in all coherency domains. + * This is the total number of bricks accessible in the numalink fabric. It + * includes all C & M bricks. Routers are NOT included. + * + * This value is also the value of the maximum number of non-router NASIDs + * in the numalink fabric. + * + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. + */ +#define UV_MAX_NUMALINK_BLADES 16384 + +/* + * Maximum number of C/Mbricks within a software SSI (hardware may support + * more). + */ +#define UV_MAX_SSI_BLADES 256 + +/* + * The largest possible NASID of a C or M brick (+ 2) + */ +#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) + +/* GAM (globally addressed memory) range table */ +struct uv_gam_range_s { + u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */ + u16 nasid; /* node's global physical address */ + s8 base; /* entry index of node's base addr */ + u8 reserved; +}; + +/* + * The following defines attributes of the HUB chip. These attributes are + * frequently referenced and are kept in a common per hub struct. + * After setup, the struct is read only, so it should be readily + * available in the L3 cache on the cpu socket for the node. + */ +struct uv_hub_info_s { + unsigned int hub_type; + unsigned char hub_revision; + unsigned long global_mmr_base; + unsigned long global_mmr_shift; + unsigned long gpa_mask; + unsigned short *socket_to_node; + unsigned short *socket_to_pnode; + unsigned short *pnode_to_socket; + struct uv_gam_range_s *gr_table; + unsigned short min_socket; + unsigned short min_pnode; + unsigned char m_val; + unsigned char n_val; + unsigned char gr_table_len; + unsigned char apic_pnode_shift; + unsigned char gpa_shift; + unsigned char nasid_shift; + unsigned char m_shift; + unsigned char n_lshift; + unsigned int gnode_extra; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned long global_gru_base; + unsigned long global_gru_shift; + unsigned short pnode; + unsigned short pnode_mask; + unsigned short coherency_domain_number; + unsigned short numa_blade_id; + unsigned short nr_possible_cpus; + unsigned short nr_online_cpus; + short memory_nid; +}; + +/* CPU specific info with a pointer to the hub common info struct */ +struct uv_cpu_info_s { + void *p_uv_hub_info; + unsigned char blade_cpu_id; + void *reserved; +}; +DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info); + +#define uv_cpu_info this_cpu_ptr(&__uv_cpu_info) +#define uv_cpu_info_per(cpu) (&per_cpu(__uv_cpu_info, cpu)) + +/* Node specific hub common info struct */ +extern void **__uv_hub_info_list; +static inline struct uv_hub_info_s *uv_hub_info_list(int node) +{ + return (struct uv_hub_info_s *)__uv_hub_info_list[node]; +} + +static inline struct uv_hub_info_s *_uv_hub_info(void) +{ + return (struct uv_hub_info_s *)uv_cpu_info->p_uv_hub_info; +} +#define uv_hub_info _uv_hub_info() + +static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu) +{ + return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info; +} + +static inline int uv_hub_type(void) +{ + return uv_hub_info->hub_type; +} + +static inline __init void uv_hub_type_set(int uvmask) +{ + uv_hub_info->hub_type = uvmask; +} + + +/* + * HUB revision ranges for each UV HUB architecture. + * This is a software convention - NOT the hardware revision numbers in + * the hub chip. + */ +#define UV2_HUB_REVISION_BASE 3 +#define UV3_HUB_REVISION_BASE 5 +#define UV4_HUB_REVISION_BASE 7 +#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ +#define UV5_HUB_REVISION_BASE 9 + +static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; } +static inline int is_uv1_hub(void) { return 0; } +static inline int is_uv2_hub(void) { return is_uv(UV2); } +static inline int is_uv3_hub(void) { return is_uv(UV3); } +static inline int is_uv4a_hub(void) { return is_uv(UV4A); } +static inline int is_uv4_hub(void) { return is_uv(UV4); } +static inline int is_uv5_hub(void) { return is_uv(UV5); } + +/* + * UV4A is a revision of UV4. So on UV4A, both is_uv4_hub() and + * is_uv4a_hub() return true, While on UV4, only is_uv4_hub() + * returns true. So to get true results, first test if is UV4A, + * then test if is UV4. + */ + +/* UVX class: UV2,3,4 */ +static inline int is_uvx_hub(void) { return is_uv(UVX); } + +/* UVY class: UV5,..? */ +static inline int is_uvy_hub(void) { return is_uv(UVY); } + +/* Any UV Hubbed System */ +static inline int is_uv_hub(void) { return is_uv(UV_ANY); } + +union uvh_apicid { + unsigned long v; + struct uvh_apicid_s { + unsigned long local_apic_mask : 24; + unsigned long local_apic_shift : 5; + unsigned long unused1 : 3; + unsigned long pnode_mask : 24; + unsigned long pnode_shift : 5; + unsigned long unused2 : 3; + } s; +}; + +/* + * Local & Global MMR space macros. + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) + */ +#define UV_NASID_TO_PNODE(n) \ + (((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) +#define UV_PNODE_TO_NASID(p) \ + (UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift) + +#define UV2_LOCAL_MMR_BASE 0xfa000000UL +#define UV2_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) + +#define UV3_LOCAL_MMR_BASE 0xfa000000UL +#define UV3_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) + +#define UV4_LOCAL_MMR_BASE 0xfa000000UL +#define UV4_GLOBAL_MMR32_BASE 0 +#define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV4_GLOBAL_MMR32_SIZE 0 + +#define UV5_LOCAL_MMR_BASE 0xfa000000UL +#define UV5_GLOBAL_MMR32_BASE 0 +#define UV5_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV5_GLOBAL_MMR32_SIZE 0 + +#define UV_LOCAL_MMR_BASE ( \ + is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \ + is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \ + is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \ + is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \ + 0) + +#define UV_GLOBAL_MMR32_BASE ( \ + is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \ + is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \ + is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \ + is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \ + 0) + +#define UV_LOCAL_MMR_SIZE ( \ + is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \ + is_uv(UV3) ? UV3_LOCAL_MMR_SIZE : \ + is_uv(UV4) ? UV4_LOCAL_MMR_SIZE : \ + is_uv(UV5) ? UV5_LOCAL_MMR_SIZE : \ + 0) + +#define UV_GLOBAL_MMR32_SIZE ( \ + is_uv(UV2) ? UV2_GLOBAL_MMR32_SIZE : \ + is_uv(UV3) ? UV3_GLOBAL_MMR32_SIZE : \ + is_uv(UV4) ? UV4_GLOBAL_MMR32_SIZE : \ + is_uv(UV5) ? UV5_GLOBAL_MMR32_SIZE : \ + 0) + +#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) + +#define UV_GLOBAL_GRU_MMR_BASE 0x4000000 + +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define _UV_GLOBAL_MMR64_PNODE_SHIFT 26 +#define UV_GLOBAL_MMR64_PNODE_SHIFT (uv_hub_info->global_mmr_shift) + +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) + +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + (((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +#define UVH_APICID 0x002D0E00L +#define UV_APIC_PNODE_SHIFT 6 + +/* Local Bus from cpu's perspective */ +#define LOCAL_BUS_BASE 0x1c00000 +#define LOCAL_BUS_SIZE (4 * 1024 * 1024) + +/* + * System Controller Interface Reg + * + * Note there are NO leds on a UV system. This register is only + * used by the system controller to monitor system-wide operation. + * There are 64 regs per node. With Nehalem cpus (2 cores per node, + * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on + * a node. + * + * The window is located at top of ACPI MMR space + */ +#define SCIR_WINDOW_COUNT 64 +#define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \ + LOCAL_BUS_SIZE - \ + SCIR_WINDOW_COUNT) + +#define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */ +#define SCIR_CPU_ACTIVITY 0x02 /* not idle */ +#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */ + +/* Loop through all installed blades */ +#define for_each_possible_blade(bid) \ + for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++) + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* global bits offset - number of local address bits in gpa for this UV arch */ +static inline unsigned int uv_gpa_shift(void) +{ + return uv_hub_info->gpa_shift; +} +#define _uv_gpa_shift + +/* Find node that has the address range that contains global address */ +static inline struct uv_gam_range_s *uv_gam_range(unsigned long pa) +{ + struct uv_gam_range_s *gr = uv_hub_info->gr_table; + unsigned long pal = (pa & uv_hub_info->gpa_mask) >> UV_GAM_RANGE_SHFT; + int i, num = uv_hub_info->gr_table_len; + + if (gr) { + for (i = 0; i < num; i++, gr++) { + if (pal < gr->limit) + return gr; + } + } + pr_crit("UV: GAM Range for 0x%lx not found at %p!\n", pa, gr); + BUG(); +} + +/* Return base address of node that contains global address */ +static inline unsigned long uv_gam_range_base(unsigned long pa) +{ + struct uv_gam_range_s *gr = uv_gam_range(pa); + int base = gr->base; + + if (base < 0) + return 0UL; + + return uv_hub_info->gr_table[base].limit; +} + +/* socket phys RAM --> UV global NASID (UV4+) */ +static inline unsigned long uv_soc_phys_ram_to_nasid(unsigned long paddr) +{ + return uv_gam_range(paddr)->nasid; +} +#define _uv_soc_phys_ram_to_nasid + +/* socket virtual --> UV global NASID (UV4+) */ +static inline unsigned long uv_gpa_nasid(void *v) +{ + return uv_soc_phys_ram_to_nasid(__pa(v)); +} + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + unsigned int m_val = uv_hub_info->m_val; + + if (paddr < uv_hub_info->lowmem_remap_top) + paddr |= uv_hub_info->lowmem_remap_base; + + if (m_val) { + paddr |= uv_hub_info->gnode_upper; + paddr = ((paddr << uv_hub_info->m_shift) + >> uv_hub_info->m_shift) | + ((paddr >> uv_hub_info->m_val) + << uv_hub_info->n_lshift); + } else { + paddr |= uv_soc_phys_ram_to_nasid(paddr) + << uv_hub_info->gpa_shift; + } + return paddr; +} + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return uv_soc_phys_ram_to_gpa(__pa(v)); +} + +/* Top two bits indicate the requested address is in MMR space. */ +static inline int +uv_gpa_in_mmr_space(unsigned long gpa) +{ + return (gpa >> 62) == 0x3UL; +} + +/* UV global physical address --> socket phys RAM */ +static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) +{ + unsigned long paddr; + unsigned long remap_base = uv_hub_info->lowmem_remap_base; + unsigned long remap_top = uv_hub_info->lowmem_remap_top; + unsigned int m_val = uv_hub_info->m_val; + + if (m_val) + gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | + ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); + + paddr = gpa & uv_hub_info->gpa_mask; + if (paddr >= remap_base && paddr < remap_base + remap_top) + paddr -= remap_base; + return paddr; +} + +/* gpa -> gnode */ +static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) +{ + unsigned int n_lshift = uv_hub_info->n_lshift; + + if (n_lshift) + return gpa >> n_lshift; + + return uv_gam_range(gpa)->nasid >> 1; +} + +/* gpa -> pnode */ +static inline int uv_gpa_to_pnode(unsigned long gpa) +{ + return uv_gpa_to_gnode(gpa) & uv_hub_info->pnode_mask; +} + +/* gpa -> node offset */ +static inline unsigned long uv_gpa_to_offset(unsigned long gpa) +{ + unsigned int m_shift = uv_hub_info->m_shift; + + if (m_shift) + return (gpa << m_shift) >> m_shift; + + return (gpa & uv_hub_info->gpa_mask) - uv_gam_range_base(gpa); +} + +/* Convert socket to node */ +static inline int _uv_socket_to_node(int socket, unsigned short *s2nid) +{ + return s2nid ? s2nid[socket - uv_hub_info->min_socket] : socket; +} + +static inline int uv_socket_to_node(int socket) +{ + return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + unsigned int m_val = uv_hub_info->m_val; + unsigned long base; + unsigned short sockid, node, *p2s; + + if (m_val) + return __va(((unsigned long)pnode << m_val) | offset); + + p2s = uv_hub_info->pnode_to_socket; + sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; + node = uv_socket_to_node(sockid); + + /* limit address of previous socket is our base, except node 0 is 0 */ + if (!node) + return __va((unsigned long)offset); + + base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + return __va(base << UV_GAM_RANGE_SHFT | offset); +} + +/* Extract/Convert a PNODE from an APICID (full apicid, not processor subset) */ +static inline int uv_apicid_to_pnode(int apicid) +{ + int pnode = apicid >> uv_hub_info->apic_pnode_shift; + unsigned short *s2pn = uv_hub_info->socket_to_pnode; + + return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode; +} + +/* + * Access global MMRs using the low memory MMR32 space. This region supports + * faster MMR access but not all MMRs are accessible in this space. + */ +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) +{ + return __va(UV_GLOBAL_MMR32_BASE | + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) +{ + writeq(val, uv_global_mmr32_address(pnode, offset)); +} + +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) +{ + return readq(uv_global_mmr32_address(pnode, offset)); +} + +/* + * Access Global MMR space using the MMR space located at the top of physical + * memory. + */ +static inline volatile void __iomem *uv_global_mmr64_address(int pnode, unsigned long offset) +{ + return __va(UV_GLOBAL_MMR64_BASE | + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) +{ + writeq(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) +{ + return readq(uv_global_mmr64_address(pnode, offset)); +} + +static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) +{ + writeb(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset) +{ + return readb(uv_global_mmr64_address(pnode, offset)); +} + +/* + * Access hub local MMRs. Faster than using global space but only local MMRs + * are accessible. + */ +static inline unsigned long *uv_local_mmr_address(unsigned long offset) +{ + return __va(UV_LOCAL_MMR_BASE | offset); +} + +static inline unsigned long uv_read_local_mmr(unsigned long offset) +{ + return readq(uv_local_mmr_address(offset)); +} + +static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) +{ + writeq(val, uv_local_mmr_address(offset)); +} + +static inline unsigned char uv_read_local_mmr8(unsigned long offset) +{ + return readb(uv_local_mmr_address(offset)); +} + +static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) +{ + writeb(val, uv_local_mmr_address(offset)); +} + +/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */ +static inline int uv_blade_processor_id(void) +{ + return uv_cpu_info->blade_cpu_id; +} + +/* Blade-local cpu number of cpu N. Numbered 0 .. <# cpus on the blade> */ +static inline int uv_cpu_blade_processor_id(int cpu) +{ + return uv_cpu_info_per(cpu)->blade_cpu_id; +} + +/* Blade number to Node number (UV2..UV4 is 1:1) */ +static inline int uv_blade_to_node(int blade) +{ + return blade; +} + +/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ +static inline int uv_numa_blade_id(void) +{ + return uv_hub_info->numa_blade_id; +} + +/* + * Convert linux node number to the UV blade number. + * .. Currently for UV2 thru UV4 the node and the blade are identical. + * .. If this changes then you MUST check references to this function! + */ +static inline int uv_node_to_blade_id(int nid) +{ + return nid; +} + +/* Convert a CPU number to the UV blade number */ +static inline int uv_cpu_to_blade_id(int cpu) +{ + return uv_node_to_blade_id(cpu_to_node(cpu)); +} + +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) +{ + return uv_hub_info_list(uv_blade_to_node(bid))->pnode; +} + +/* Nid of memory node on blade. -1 if no blade-local memory */ +static inline int uv_blade_to_memory_nid(int bid) +{ + return uv_hub_info_list(uv_blade_to_node(bid))->memory_nid; +} + +/* Determine the number of possible cpus on a blade */ +static inline int uv_blade_nr_possible_cpus(int bid) +{ + return uv_hub_info_list(uv_blade_to_node(bid))->nr_possible_cpus; +} + +/* Determine the number of online cpus on a blade */ +static inline int uv_blade_nr_online_cpus(int bid) +{ + return uv_hub_info_list(uv_blade_to_node(bid))->nr_online_cpus; +} + +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) +{ + return uv_cpu_hub_info(cpu)->pnode; +} + +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) +{ + return uv_hub_info_list(nid)->pnode; +} + +/* Maximum possible number of blades */ +extern short uv_possible_blades; +static inline int uv_num_possible_blades(void) +{ + return uv_possible_blades; +} + +/* Per Hub NMI support */ +extern void uv_nmi_setup(void); +extern void uv_nmi_setup_hubless(void); + +/* BIOS/Kernel flags exchange MMR */ +#define UVH_BIOS_KERNEL_MMR UVH_SCRATCH5 +#define UVH_BIOS_KERNEL_MMR_ALIAS UVH_SCRATCH5_ALIAS +#define UVH_BIOS_KERNEL_MMR_ALIAS_2 UVH_SCRATCH5_ALIAS_2 + +/* TSC sync valid, set by BIOS */ +#define UVH_TSC_SYNC_MMR UVH_BIOS_KERNEL_MMR +#define UVH_TSC_SYNC_SHIFT 10 +#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */ +#define UVH_TSC_SYNC_MASK 3 /* 0011 */ +#define UVH_TSC_SYNC_VALID 3 /* 0011 */ +#define UVH_TSC_SYNC_UNKNOWN 0 /* 0000 */ + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR +#define UVH_NMI_MMR_CLEAR UVH_BIOS_KERNEL_MMR_ALIAS +#define UVH_NMI_MMR_SHIFT 63 +#define UVH_NMI_MMR_TYPE "SCRATCH5" + +struct uv_hub_nmi_s { + raw_spinlock_t nmi_lock; + atomic_t in_nmi; /* flag this node in UV NMI IRQ */ + atomic_t cpu_owner; /* last locker of this struct */ + atomic_t read_mmr_count; /* count of MMR reads */ + atomic_t nmi_count; /* count of true UV NMIs */ + unsigned long nmi_value; /* last value read from NMI MMR */ + bool hub_present; /* false means UV hubless system */ + bool pch_owner; /* indicates this hub owns PCH */ +}; + +struct uv_cpu_nmi_s { + struct uv_hub_nmi_s *hub; + int state; + int pinging; + int queries; + int pings; +}; + +DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); + +#define uv_hub_nmi this_cpu_read(uv_cpu_nmi.hub) +#define uv_cpu_nmi_per(cpu) (per_cpu(uv_cpu_nmi, cpu)) +#define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub) + +/* uv_cpu_nmi_states */ +#define UV_NMI_STATE_OUT 0 +#define UV_NMI_STATE_IN 1 +#define UV_NMI_STATE_DUMP 2 +#define UV_NMI_STATE_DUMP_DONE 3 + +/* + * Get the minimum revision number of the hub chips within the partition. + * (See UVx_HUB_REVISION_BASE above for specific values.) + */ +static inline int uv_get_min_hub_revision_id(void) +{ + return uv_hub_info->hub_revision; +} + +#endif /* CONFIG_X86_64 */ +#endif /* _ASM_X86_UV_UV_HUB_H */ diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h new file mode 100644 index 000000000..d6b17c760 --- /dev/null +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -0,0 +1,38 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ definitions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_IRQ_H +#define _ASM_X86_UV_UV_IRQ_H + +/* If a generic version of this structure gets defined, eliminate this one. */ +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; + +enum { + UV_AFFINITY_ALL, + UV_AFFINITY_NODE, + UV_AFFINITY_CPU +}; + +extern int uv_irq_2_mmr_info(int, unsigned long *, int *); +extern int uv_setup_irq(char *, int, int, unsigned long, int); +extern void uv_teardown_irq(unsigned int); + +#endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h new file mode 100644 index 000000000..57fa67373 --- /dev/null +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -0,0 +1,4637 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * HPE UV MMR definitions + * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_MMRS_H +#define _ASM_X86_UV_UV_MMRS_H + +/* + * This file contains MMR definitions for all UV hubs types. + * + * To minimize coding differences between hub types, the symbols are + * grouped by architecture types. + * + * UVH - definitions common to all UV hub types. + * UVXH - definitions common to UVX class (2, 3, 4). + * UVYH - definitions common to UVY class (5). + * UV5H - definitions specific to UV type 5 hub. + * UV4AH - definitions specific to UV type 4A hub. + * UV4H - definitions specific to UV type 4 hub. + * UV3H - definitions specific to UV type 3 hub. + * UV2H - definitions specific to UV type 2 hub. + * + * If the MMR exists on all hub types but have different addresses, + * use a conditional operator to define the value at runtime. Any + * that are not defined are blank. + * (UV4A variations only generated if different from uv4) + * #define UVHxxx ( + * is_uv(UV5) ? UV5Hxxx value : + * is_uv(UV4A) ? UV4AHxxx value : + * is_uv(UV4) ? UV4Hxxx value : + * is_uv(UV3) ? UV3Hxxx value : + * is_uv(UV2) ? UV2Hxxx value : + * or ) + * + * Class UVX has UVs (2|3|4|4A). + * Class UVY has UVs (5). + * + * union uvh_xxx { + * unsigned long v; + * struct uvh_xxx_s { # Common fields only + * } s; + * struct uv5h_xxx_s { # Full UV5 definition (*) + * } s5; + * struct uv4ah_xxx_s { # Full UV4A definition (*) + * } s4a; + * struct uv4h_xxx_s { # Full UV4 definition (*) + * } s4; + * struct uv3h_xxx_s { # Full UV3 definition (*) + * } s3; + * struct uv2h_xxx_s { # Full UV2 definition (*) + * } s2; + * }; + * (* - if present and different than the common struct) + * + * Only essential differences are enumerated. For example, if the address is + * the same for all UV's, only a single #define is generated. Likewise, + * if the contents is the same for all hubs, only the "s" structure is + * generated. + * + * (GEN Flags: undefs=function) + */ + + /* UV bit masks */ +#define UV2 (1 << 0) +#define UV3 (1 << 1) +#define UV4 (1 << 2) +#define UV4A (1 << 3) +#define UV5 (1 << 4) +#define UVX (UV2|UV3|UV4) +#define UVY (UV5) +#define UV_ANY (~0) + + + + +#define UV_MMR_ENABLE (1UL << 63) + +#define UV1_HUB_PART_NUMBER 0x88a5 +#define UV2_HUB_PART_NUMBER 0x8eb8 +#define UV2_HUB_PART_NUMBER_X 0x1111 +#define UV3_HUB_PART_NUMBER 0x9578 +#define UV3_HUB_PART_NUMBER_X 0x4321 +#define UV4_HUB_PART_NUMBER 0x99a1 +#define UV5_HUB_PART_NUMBER 0xa171 + +/* Error function to catch undefined references */ +extern unsigned long uv_undefined(char *str); + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0 0x70000UL + +/* UVH common defines*/ +#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL + +/* UVXH common defines */ +#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2 +#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL +#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 +#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL +#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 +#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL +#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 +#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL +#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 +#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL +#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 +#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL +#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 +#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL +#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 +#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL +#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 +#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL +#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 +#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL +#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 +#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL +#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 +#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL +#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 +#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL + +/* UVYH common defines */ +#define UVYH_EVENT_OCCURRED0_KT_HCERR_SHFT 1 +#define UVYH_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL +#define UVYH_EVENT_OCCURRED0_RH0_HCERR_SHFT 2 +#define UVYH_EVENT_OCCURRED0_RH0_HCERR_MASK 0x0000000000000004UL +#define UVYH_EVENT_OCCURRED0_RH1_HCERR_SHFT 3 +#define UVYH_EVENT_OCCURRED0_RH1_HCERR_MASK 0x0000000000000008UL +#define UVYH_EVENT_OCCURRED0_LH0_HCERR_SHFT 4 +#define UVYH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000010UL +#define UVYH_EVENT_OCCURRED0_LH1_HCERR_SHFT 5 +#define UVYH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000020UL +#define UVYH_EVENT_OCCURRED0_LH2_HCERR_SHFT 6 +#define UVYH_EVENT_OCCURRED0_LH2_HCERR_MASK 0x0000000000000040UL +#define UVYH_EVENT_OCCURRED0_LH3_HCERR_SHFT 7 +#define UVYH_EVENT_OCCURRED0_LH3_HCERR_MASK 0x0000000000000080UL +#define UVYH_EVENT_OCCURRED0_XB_HCERR_SHFT 8 +#define UVYH_EVENT_OCCURRED0_XB_HCERR_MASK 0x0000000000000100UL +#define UVYH_EVENT_OCCURRED0_RDM_HCERR_SHFT 9 +#define UVYH_EVENT_OCCURRED0_RDM_HCERR_MASK 0x0000000000000200UL +#define UVYH_EVENT_OCCURRED0_NI0_HCERR_SHFT 10 +#define UVYH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000400UL +#define UVYH_EVENT_OCCURRED0_NI1_HCERR_SHFT 11 +#define UVYH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000800UL +#define UVYH_EVENT_OCCURRED0_LB_AOERR0_SHFT 12 +#define UVYH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000001000UL +#define UVYH_EVENT_OCCURRED0_KT_AOERR0_SHFT 13 +#define UVYH_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000002000UL +#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_SHFT 14 +#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_MASK 0x0000000000004000UL +#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_SHFT 15 +#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_MASK 0x0000000000008000UL +#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 16 +#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000010000UL +#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 17 +#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000020000UL +#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_SHFT 18 +#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_MASK 0x0000000000040000UL +#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_SHFT 19 +#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_MASK 0x0000000000080000UL +#define UVYH_EVENT_OCCURRED0_XB_AOERR0_SHFT 20 +#define UVYH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000100000UL +#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_SHFT 21 +#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_MASK 0x0000000000200000UL +#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_SHFT 22 +#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_MASK 0x0000000000400000UL +#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_SHFT 23 +#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_MASK 0x0000000000800000UL +#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 24 +#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000001000000UL +#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 25 +#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000002000000UL +#define UVYH_EVENT_OCCURRED0_LB_AOERR1_SHFT 26 +#define UVYH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000004000000UL +#define UVYH_EVENT_OCCURRED0_KT_AOERR1_SHFT 27 +#define UVYH_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000008000000UL +#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_SHFT 28 +#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_MASK 0x0000000010000000UL +#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_SHFT 29 +#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_MASK 0x0000000020000000UL +#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 30 +#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000040000000UL +#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 31 +#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000080000000UL +#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_SHFT 32 +#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_MASK 0x0000000100000000UL +#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_SHFT 33 +#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_MASK 0x0000000200000000UL +#define UVYH_EVENT_OCCURRED0_XB_AOERR1_SHFT 34 +#define UVYH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000400000000UL +#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_SHFT 35 +#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_MASK 0x0000000800000000UL +#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_SHFT 36 +#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_MASK 0x0000001000000000UL +#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_SHFT 37 +#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_MASK 0x0000002000000000UL +#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 38 +#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000004000000000UL +#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 39 +#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000008000000000UL +#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 40 +#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000010000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 41 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000020000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 42 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000040000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 43 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000080000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 44 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000100000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 45 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000200000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 46 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000400000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 47 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000800000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 48 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0001000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 49 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0002000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 50 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0004000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 51 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0008000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 52 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0010000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 53 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0020000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 54 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0040000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 55 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0080000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 56 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0100000000000000UL +#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 57 +#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0200000000000000UL +#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 58 +#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0400000000000000UL +#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 59 +#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0800000000000000UL +#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 60 +#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x1000000000000000UL +#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 61 +#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x2000000000000000UL + +/* UV4 unique defines */ +#define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT 1 +#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10 +#define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17 +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18 +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19 +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20 +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21 +#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22 +#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23 +#define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24 +#define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25 +#define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26 +#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27 +#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28 +#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29 +#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30 +#define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31 +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32 +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33 +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34 +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35 +#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36 +#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37 +#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0002000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0004000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0008000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0010000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53 +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0020000000000000UL +#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54 +#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0040000000000000UL +#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55 +#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0080000000000000UL +#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56 +#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0100000000000000UL +#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57 +#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0200000000000000UL +#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58 +#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0400000000000000UL +#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59 +#define UV4H_EVENT_OCCURRED0_IPI_INT_MASK 0x0800000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x1000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x2000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x4000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63 +#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x8000000000000000UL + +/* UV3 unique defines */ +#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +/* UV2 unique defines */ +#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK ( \ + is_uv(UV4) ? 0x1000000000000000UL : \ + is_uv(UV3) ? 0x0040000000000000UL : \ + is_uv(UV2) ? 0x0040000000000000UL : \ + 0) +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT ( \ + is_uv(UV4) ? 60 : \ + is_uv(UV3) ? 54 : \ + is_uv(UV2) ? 54 : \ + -1) + +union uvh_event_occurred0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long rsvd_1_63:63; + } s; + + /* UVXH common struct */ + struct uvxh_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long rsvd_1:1; + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long rsvd_10:1; + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rsvd_17_63:47; + } sx; + + /* UVYH common struct */ + struct uvyh_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long kt_hcerr:1; /* RW */ + unsigned long rh0_hcerr:1; /* RW */ + unsigned long rh1_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long lh2_hcerr:1; /* RW */ + unsigned long lh3_hcerr:1; /* RW */ + unsigned long xb_hcerr:1; /* RW */ + unsigned long rdm_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long kt_aoerr0:1; /* RW */ + unsigned long rh0_aoerr0:1; /* RW */ + unsigned long rh1_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long lh2_aoerr0:1; /* RW */ + unsigned long lh3_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rdm_aoerr0:1; /* RW */ + unsigned long rt0_aoerr0:1; /* RW */ + unsigned long rt1_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long kt_aoerr1:1; /* RW */ + unsigned long rh0_aoerr1:1; /* RW */ + unsigned long rh1_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long lh2_aoerr1:1; /* RW */ + unsigned long lh3_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rdm_aoerr1:1; /* RW */ + unsigned long rt0_aoerr1:1; /* RW */ + unsigned long rt1_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long rsvd_62_63:2; + } sy; + + /* UV5 unique struct */ + struct uv5h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long kt_hcerr:1; /* RW */ + unsigned long rh0_hcerr:1; /* RW */ + unsigned long rh1_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long lh2_hcerr:1; /* RW */ + unsigned long lh3_hcerr:1; /* RW */ + unsigned long xb_hcerr:1; /* RW */ + unsigned long rdm_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long kt_aoerr0:1; /* RW */ + unsigned long rh0_aoerr0:1; /* RW */ + unsigned long rh1_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long lh2_aoerr0:1; /* RW */ + unsigned long lh3_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rdm_aoerr0:1; /* RW */ + unsigned long rt0_aoerr0:1; /* RW */ + unsigned long rt1_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long kt_aoerr1:1; /* RW */ + unsigned long rh0_aoerr1:1; /* RW */ + unsigned long rh1_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long lh2_aoerr1:1; /* RW */ + unsigned long lh3_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rdm_aoerr1:1; /* RW */ + unsigned long rt0_aoerr1:1; /* RW */ + unsigned long rt1_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long rsvd_62_63:2; + } s5; + + /* UV4 unique struct */ + struct uv4h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long kt_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long kt_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rtq0_aoerr0:1; /* RW */ + unsigned long rtq1_aoerr0:1; /* RW */ + unsigned long rtq2_aoerr0:1; /* RW */ + unsigned long rtq3_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long kt_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rtq0_aoerr1:1; /* RW */ + unsigned long rtq1_aoerr1:1; /* RW */ + unsigned long rtq2_aoerr1:1; /* RW */ + unsigned long rtq3_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long qp_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long qp_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rt_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long qp_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rt_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long rsvd_59_63:5; + } s3; + + /* UV2 unique struct */ + struct uv2h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long qp_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long qp_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rt_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long qp_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rt_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long rsvd_59_63:5; + } s2; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL + + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED1 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED1 0x70080UL + + + +/* UVYH common defines */ +#define UVYH_EVENT_OCCURRED1_IPI_INT_SHFT 0 +#define UVYH_EVENT_OCCURRED1_IPI_INT_MASK 0x0000000000000001UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_SHFT 1 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_MASK 0x0000000000000002UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_SHFT 2 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_MASK 0x0000000000000004UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_SHFT 3 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_MASK 0x0000000000000008UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_SHFT 4 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_MASK 0x0000000000000010UL +#define UVYH_EVENT_OCCURRED1_PROFILE_INT_SHFT 5 +#define UVYH_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000020UL +#define UVYH_EVENT_OCCURRED1_BAU_DATA_SHFT 6 +#define UVYH_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000040UL +#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_SHFT 7 +#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000080UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_SHFT 8 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_MASK 0x0000000000000100UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_SHFT 9 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_MASK 0x0000000000000200UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_SHFT 10 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_MASK 0x0000000000000400UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_SHFT 11 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_MASK 0x0000000000000800UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_SHFT 12 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_MASK 0x0000000000001000UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_SHFT 13 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_MASK 0x0000000000002000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_SHFT 14 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_MASK 0x0000000000004000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_SHFT 15 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_MASK 0x0000000000008000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_SHFT 16 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_MASK 0x0000000000010000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_SHFT 17 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_MASK 0x0000000000020000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_SHFT 18 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_MASK 0x0000000000040000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_SHFT 19 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_MASK 0x0000000000080000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_SHFT 20 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_MASK 0x0000000000100000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_SHFT 21 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_MASK 0x0000000000200000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_SHFT 22 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_MASK 0x0000000000400000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_SHFT 23 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_MASK 0x0000000000800000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_SHFT 24 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_MASK 0x0000000001000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_SHFT 25 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_MASK 0x0000000002000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_SHFT 26 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_MASK 0x0000000004000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_SHFT 27 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_MASK 0x0000000008000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_SHFT 28 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_MASK 0x0000000010000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_SHFT 29 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_MASK 0x0000000020000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_SHFT 30 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_MASK 0x0000000040000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_SHFT 31 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_MASK 0x0000000080000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_SHFT 32 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_MASK 0x0000000100000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_SHFT 33 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_MASK 0x0000000200000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_SHFT 34 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_MASK 0x0000000400000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_SHFT 35 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_MASK 0x0000000800000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_SHFT 36 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_MASK 0x0000001000000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_SHFT 37 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_MASK 0x0000002000000000UL + +/* UV4 unique defines */ +#define UV4H_EVENT_OCCURRED1_PROFILE_INT_SHFT 0 +#define UV4H_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000001UL +#define UV4H_EVENT_OCCURRED1_BAU_DATA_SHFT 1 +#define UV4H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_SHFT 2 +#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000004UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 3 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000000008UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 4 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000000010UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 5 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000000020UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 6 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000000040UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 7 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000000080UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 8 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000000100UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 9 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000000000200UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 10 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 11 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000000000800UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 12 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000000001000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 13 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000000002000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 14 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000000004000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 15 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000000008000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 16 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000000010000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 17 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 18 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_SHFT 19 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_SHFT 20 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_SHFT 21 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_SHFT 22 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_SHFT 23 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_SHFT 24 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_SHFT 25 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_SHFT 26 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 27 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 28 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 29 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 30 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 31 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 32 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 33 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 34 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 35 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 36 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 37 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 38 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 39 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 40 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 41 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 42 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_SHFT 43 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_SHFT 44 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_SHFT 45 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_SHFT 46 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_SHFT 47 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_SHFT 48 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_SHFT 49 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_MASK 0x0002000000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_SHFT 50 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_MASK 0x0004000000000000UL + +/* UV3 unique defines */ +#define UV3H_EVENT_OCCURRED1_BAU_DATA_SHFT 0 +#define UV3H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1 +#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL +#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50 +#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL +#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51 +#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL + +/* UV2 unique defines */ +#define UV2H_EVENT_OCCURRED1_BAU_DATA_SHFT 0 +#define UV2H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1 +#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50 +#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51 +#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL + +#define UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK ( \ + is_uv(UV5) ? 0x0000000000000002UL : \ + 0) +#define UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT ( \ + is_uv(UV5) ? 1 : \ + -1) + +union uvyh_event_occurred1_u { + unsigned long v; + + /* UVYH common struct */ + struct uvyh_event_occurred1_s { + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long bau_data:1; /* RW */ + unsigned long proc_general:1; /* RW */ + unsigned long xh_tlb_int0:1; /* RW */ + unsigned long xh_tlb_int1:1; /* RW */ + unsigned long xh_tlb_int2:1; /* RW */ + unsigned long xh_tlb_int3:1; /* RW */ + unsigned long xh_tlb_int4:1; /* RW */ + unsigned long xh_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int0:1; /* RW */ + unsigned long rdm_tlb_int1:1; /* RW */ + unsigned long rdm_tlb_int2:1; /* RW */ + unsigned long rdm_tlb_int3:1; /* RW */ + unsigned long rdm_tlb_int4:1; /* RW */ + unsigned long rdm_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int6:1; /* RW */ + unsigned long rdm_tlb_int7:1; /* RW */ + unsigned long rdm_tlb_int8:1; /* RW */ + unsigned long rdm_tlb_int9:1; /* RW */ + unsigned long rdm_tlb_int10:1; /* RW */ + unsigned long rdm_tlb_int11:1; /* RW */ + unsigned long rdm_tlb_int12:1; /* RW */ + unsigned long rdm_tlb_int13:1; /* RW */ + unsigned long rdm_tlb_int14:1; /* RW */ + unsigned long rdm_tlb_int15:1; /* RW */ + unsigned long rdm_tlb_int16:1; /* RW */ + unsigned long rdm_tlb_int17:1; /* RW */ + unsigned long rdm_tlb_int18:1; /* RW */ + unsigned long rdm_tlb_int19:1; /* RW */ + unsigned long rdm_tlb_int20:1; /* RW */ + unsigned long rdm_tlb_int21:1; /* RW */ + unsigned long rdm_tlb_int22:1; /* RW */ + unsigned long rdm_tlb_int23:1; /* RW */ + unsigned long rsvd_38_63:26; + } sy; + + /* UV5 unique struct */ + struct uv5h_event_occurred1_s { + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long bau_data:1; /* RW */ + unsigned long proc_general:1; /* RW */ + unsigned long xh_tlb_int0:1; /* RW */ + unsigned long xh_tlb_int1:1; /* RW */ + unsigned long xh_tlb_int2:1; /* RW */ + unsigned long xh_tlb_int3:1; /* RW */ + unsigned long xh_tlb_int4:1; /* RW */ + unsigned long xh_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int0:1; /* RW */ + unsigned long rdm_tlb_int1:1; /* RW */ + unsigned long rdm_tlb_int2:1; /* RW */ + unsigned long rdm_tlb_int3:1; /* RW */ + unsigned long rdm_tlb_int4:1; /* RW */ + unsigned long rdm_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int6:1; /* RW */ + unsigned long rdm_tlb_int7:1; /* RW */ + unsigned long rdm_tlb_int8:1; /* RW */ + unsigned long rdm_tlb_int9:1; /* RW */ + unsigned long rdm_tlb_int10:1; /* RW */ + unsigned long rdm_tlb_int11:1; /* RW */ + unsigned long rdm_tlb_int12:1; /* RW */ + unsigned long rdm_tlb_int13:1; /* RW */ + unsigned long rdm_tlb_int14:1; /* RW */ + unsigned long rdm_tlb_int15:1; /* RW */ + unsigned long rdm_tlb_int16:1; /* RW */ + unsigned long rdm_tlb_int17:1; /* RW */ + unsigned long rdm_tlb_int18:1; /* RW */ + unsigned long rdm_tlb_int19:1; /* RW */ + unsigned long rdm_tlb_int20:1; /* RW */ + unsigned long rdm_tlb_int21:1; /* RW */ + unsigned long rdm_tlb_int22:1; /* RW */ + unsigned long rdm_tlb_int23:1; /* RW */ + unsigned long rsvd_38_63:26; + } s5; + + /* UV4 unique struct */ + struct uv4h_event_occurred1_s { + unsigned long profile_int:1; /* RW */ + unsigned long bau_data:1; /* RW */ + unsigned long proc_general:1; /* RW */ + unsigned long gr0_tlb_int0:1; /* RW */ + unsigned long gr0_tlb_int1:1; /* RW */ + unsigned long gr0_tlb_int2:1; /* RW */ + unsigned long gr0_tlb_int3:1; /* RW */ + unsigned long gr0_tlb_int4:1; /* RW */ + unsigned long gr0_tlb_int5:1; /* RW */ + unsigned long gr0_tlb_int6:1; /* RW */ + unsigned long gr0_tlb_int7:1; /* RW */ + unsigned long gr0_tlb_int8:1; /* RW */ + unsigned long gr0_tlb_int9:1; /* RW */ + unsigned long gr0_tlb_int10:1; /* RW */ + unsigned long gr0_tlb_int11:1; /* RW */ + unsigned long gr0_tlb_int12:1; /* RW */ + unsigned long gr0_tlb_int13:1; /* RW */ + unsigned long gr0_tlb_int14:1; /* RW */ + unsigned long gr0_tlb_int15:1; /* RW */ + unsigned long gr0_tlb_int16:1; /* RW */ + unsigned long gr0_tlb_int17:1; /* RW */ + unsigned long gr0_tlb_int18:1; /* RW */ + unsigned long gr0_tlb_int19:1; /* RW */ + unsigned long gr0_tlb_int20:1; /* RW */ + unsigned long gr0_tlb_int21:1; /* RW */ + unsigned long gr0_tlb_int22:1; /* RW */ + unsigned long gr0_tlb_int23:1; /* RW */ + unsigned long gr1_tlb_int0:1; /* RW */ + unsigned long gr1_tlb_int1:1; /* RW */ + unsigned long gr1_tlb_int2:1; /* RW */ + unsigned long gr1_tlb_int3:1; /* RW */ + unsigned long gr1_tlb_int4:1; /* RW */ + unsigned long gr1_tlb_int5:1; /* RW */ + unsigned long gr1_tlb_int6:1; /* RW */ + unsigned long gr1_tlb_int7:1; /* RW */ + unsigned long gr1_tlb_int8:1; /* RW */ + unsigned long gr1_tlb_int9:1; /* RW */ + unsigned long gr1_tlb_int10:1; /* RW */ + unsigned long gr1_tlb_int11:1; /* RW */ + unsigned long gr1_tlb_int12:1; /* RW */ + unsigned long gr1_tlb_int13:1; /* RW */ + unsigned long gr1_tlb_int14:1; /* RW */ + unsigned long gr1_tlb_int15:1; /* RW */ + unsigned long gr1_tlb_int16:1; /* RW */ + unsigned long gr1_tlb_int17:1; /* RW */ + unsigned long gr1_tlb_int18:1; /* RW */ + unsigned long gr1_tlb_int19:1; /* RW */ + unsigned long gr1_tlb_int20:1; /* RW */ + unsigned long gr1_tlb_int21:1; /* RW */ + unsigned long gr1_tlb_int22:1; /* RW */ + unsigned long gr1_tlb_int23:1; /* RW */ + unsigned long rsvd_51_63:13; + } s4; + + /* UV3 unique struct */ + struct uv3h_event_occurred1_s { + unsigned long bau_data:1; /* RW */ + unsigned long power_management_req:1; /* RW */ + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long gr0_tlb_int0:1; /* RW */ + unsigned long gr0_tlb_int1:1; /* RW */ + unsigned long gr0_tlb_int2:1; /* RW */ + unsigned long gr0_tlb_int3:1; /* RW */ + unsigned long gr0_tlb_int4:1; /* RW */ + unsigned long gr0_tlb_int5:1; /* RW */ + unsigned long gr0_tlb_int6:1; /* RW */ + unsigned long gr0_tlb_int7:1; /* RW */ + unsigned long gr0_tlb_int8:1; /* RW */ + unsigned long gr0_tlb_int9:1; /* RW */ + unsigned long gr0_tlb_int10:1; /* RW */ + unsigned long gr0_tlb_int11:1; /* RW */ + unsigned long gr0_tlb_int12:1; /* RW */ + unsigned long gr0_tlb_int13:1; /* RW */ + unsigned long gr0_tlb_int14:1; /* RW */ + unsigned long gr0_tlb_int15:1; /* RW */ + unsigned long gr1_tlb_int0:1; /* RW */ + unsigned long gr1_tlb_int1:1; /* RW */ + unsigned long gr1_tlb_int2:1; /* RW */ + unsigned long gr1_tlb_int3:1; /* RW */ + unsigned long gr1_tlb_int4:1; /* RW */ + unsigned long gr1_tlb_int5:1; /* RW */ + unsigned long gr1_tlb_int6:1; /* RW */ + unsigned long gr1_tlb_int7:1; /* RW */ + unsigned long gr1_tlb_int8:1; /* RW */ + unsigned long gr1_tlb_int9:1; /* RW */ + unsigned long gr1_tlb_int10:1; /* RW */ + unsigned long gr1_tlb_int11:1; /* RW */ + unsigned long gr1_tlb_int12:1; /* RW */ + unsigned long gr1_tlb_int13:1; /* RW */ + unsigned long gr1_tlb_int14:1; /* RW */ + unsigned long gr1_tlb_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rsvd_52_63:12; + } s3; + + /* UV2 unique struct */ + struct uv2h_event_occurred1_s { + unsigned long bau_data:1; /* RW */ + unsigned long power_management_req:1; /* RW */ + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long gr0_tlb_int0:1; /* RW */ + unsigned long gr0_tlb_int1:1; /* RW */ + unsigned long gr0_tlb_int2:1; /* RW */ + unsigned long gr0_tlb_int3:1; /* RW */ + unsigned long gr0_tlb_int4:1; /* RW */ + unsigned long gr0_tlb_int5:1; /* RW */ + unsigned long gr0_tlb_int6:1; /* RW */ + unsigned long gr0_tlb_int7:1; /* RW */ + unsigned long gr0_tlb_int8:1; /* RW */ + unsigned long gr0_tlb_int9:1; /* RW */ + unsigned long gr0_tlb_int10:1; /* RW */ + unsigned long gr0_tlb_int11:1; /* RW */ + unsigned long gr0_tlb_int12:1; /* RW */ + unsigned long gr0_tlb_int13:1; /* RW */ + unsigned long gr0_tlb_int14:1; /* RW */ + unsigned long gr0_tlb_int15:1; /* RW */ + unsigned long gr1_tlb_int0:1; /* RW */ + unsigned long gr1_tlb_int1:1; /* RW */ + unsigned long gr1_tlb_int2:1; /* RW */ + unsigned long gr1_tlb_int3:1; /* RW */ + unsigned long gr1_tlb_int4:1; /* RW */ + unsigned long gr1_tlb_int5:1; /* RW */ + unsigned long gr1_tlb_int6:1; /* RW */ + unsigned long gr1_tlb_int7:1; /* RW */ + unsigned long gr1_tlb_int8:1; /* RW */ + unsigned long gr1_tlb_int9:1; /* RW */ + unsigned long gr1_tlb_int10:1; /* RW */ + unsigned long gr1_tlb_int11:1; /* RW */ + unsigned long gr1_tlb_int12:1; /* RW */ + unsigned long gr1_tlb_int13:1; /* RW */ + unsigned long gr1_tlb_int14:1; /* RW */ + unsigned long gr1_tlb_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rsvd_52_63:12; + } s2; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED1_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED1_ALIAS 0x70088UL + + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED2 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED2 0x70100UL + + + +/* UVYH common defines */ +#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 0 +#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000000001UL +#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 1 +#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000000002UL +#define UVYH_EVENT_OCCURRED2_RTC_0_SHFT 2 +#define UVYH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000004UL +#define UVYH_EVENT_OCCURRED2_RTC_1_SHFT 3 +#define UVYH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000008UL +#define UVYH_EVENT_OCCURRED2_RTC_2_SHFT 4 +#define UVYH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000010UL +#define UVYH_EVENT_OCCURRED2_RTC_3_SHFT 5 +#define UVYH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000020UL +#define UVYH_EVENT_OCCURRED2_RTC_4_SHFT 6 +#define UVYH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000040UL +#define UVYH_EVENT_OCCURRED2_RTC_5_SHFT 7 +#define UVYH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000080UL +#define UVYH_EVENT_OCCURRED2_RTC_6_SHFT 8 +#define UVYH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000100UL +#define UVYH_EVENT_OCCURRED2_RTC_7_SHFT 9 +#define UVYH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000200UL +#define UVYH_EVENT_OCCURRED2_RTC_8_SHFT 10 +#define UVYH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000400UL +#define UVYH_EVENT_OCCURRED2_RTC_9_SHFT 11 +#define UVYH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000800UL +#define UVYH_EVENT_OCCURRED2_RTC_10_SHFT 12 +#define UVYH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000001000UL +#define UVYH_EVENT_OCCURRED2_RTC_11_SHFT 13 +#define UVYH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000002000UL +#define UVYH_EVENT_OCCURRED2_RTC_12_SHFT 14 +#define UVYH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000004000UL +#define UVYH_EVENT_OCCURRED2_RTC_13_SHFT 15 +#define UVYH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000008000UL +#define UVYH_EVENT_OCCURRED2_RTC_14_SHFT 16 +#define UVYH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000010000UL +#define UVYH_EVENT_OCCURRED2_RTC_15_SHFT 17 +#define UVYH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000020000UL +#define UVYH_EVENT_OCCURRED2_RTC_16_SHFT 18 +#define UVYH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000040000UL +#define UVYH_EVENT_OCCURRED2_RTC_17_SHFT 19 +#define UVYH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000080000UL +#define UVYH_EVENT_OCCURRED2_RTC_18_SHFT 20 +#define UVYH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000100000UL +#define UVYH_EVENT_OCCURRED2_RTC_19_SHFT 21 +#define UVYH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000200000UL +#define UVYH_EVENT_OCCURRED2_RTC_20_SHFT 22 +#define UVYH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000400000UL +#define UVYH_EVENT_OCCURRED2_RTC_21_SHFT 23 +#define UVYH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000800000UL +#define UVYH_EVENT_OCCURRED2_RTC_22_SHFT 24 +#define UVYH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000001000000UL +#define UVYH_EVENT_OCCURRED2_RTC_23_SHFT 25 +#define UVYH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000002000000UL +#define UVYH_EVENT_OCCURRED2_RTC_24_SHFT 26 +#define UVYH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000004000000UL +#define UVYH_EVENT_OCCURRED2_RTC_25_SHFT 27 +#define UVYH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000008000000UL +#define UVYH_EVENT_OCCURRED2_RTC_26_SHFT 28 +#define UVYH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000010000000UL +#define UVYH_EVENT_OCCURRED2_RTC_27_SHFT 29 +#define UVYH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000020000000UL +#define UVYH_EVENT_OCCURRED2_RTC_28_SHFT 30 +#define UVYH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000040000000UL +#define UVYH_EVENT_OCCURRED2_RTC_29_SHFT 31 +#define UVYH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000080000000UL +#define UVYH_EVENT_OCCURRED2_RTC_30_SHFT 32 +#define UVYH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000100000000UL +#define UVYH_EVENT_OCCURRED2_RTC_31_SHFT 33 +#define UVYH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000200000000UL + +/* UV4 unique defines */ +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL +#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16 +#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL +#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17 +#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18 +#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19 +#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20 +#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21 +#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22 +#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23 +#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24 +#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25 +#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26 +#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27 +#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28 +#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29 +#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30 +#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31 +#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32 +#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33 +#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34 +#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35 +#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36 +#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37 +#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38 +#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39 +#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40 +#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41 +#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42 +#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43 +#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44 +#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45 +#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46 +#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47 +#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48 +#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49 +#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL + +/* UV3 unique defines */ +#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +/* UV2 unique defines */ +#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +#define UVH_EVENT_OCCURRED2_RTC_1_MASK ( \ + is_uv(UV5) ? 0x0000000000000008UL : \ + is_uv(UV4) ? 0x0000000000080000UL : \ + is_uv(UV3) ? 0x0000000000000002UL : \ + is_uv(UV2) ? 0x0000000000000002UL : \ + 0) +#define UVH_EVENT_OCCURRED2_RTC_1_SHFT ( \ + is_uv(UV5) ? 3 : \ + is_uv(UV4) ? 19 : \ + is_uv(UV3) ? 1 : \ + is_uv(UV2) ? 1 : \ + -1) + +union uvyh_event_occurred2_u { + unsigned long v; + + /* UVYH common struct */ + struct uvyh_event_occurred2_s { + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_34_63:30; + } sy; + + /* UV5 unique struct */ + struct uv5h_event_occurred2_s { + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_34_63:30; + } s5; + + /* UV4 unique struct */ + struct uv4h_event_occurred2_s { + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_50_63:14; + } s4; + + /* UV3 unique struct */ + struct uv3h_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } s3; + + /* UV2 unique struct */ + struct uv2h_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } s2; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED2_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED2_ALIAS 0x70108UL + + +/* ========================================================================= */ +/* UVH_EXTIO_INT0_BROADCAST */ +/* ========================================================================= */ +#define UVH_EXTIO_INT0_BROADCAST 0x61448UL + +/* UVH common defines*/ +#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0 +#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL + + +union uvh_extio_int0_broadcast_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s; + + /* UV5 unique struct */ + struct uv5h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s5; + + /* UV4 unique struct */ + struct uv4h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s4; + + /* UV3 unique struct */ + struct uv3h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s3; + + /* UV2 unique struct */ + struct uv2h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s2; +}; + +/* ========================================================================= */ +/* UVH_GR0_GAM_GR_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_GAM_GR_CONFIG ( \ + is_uv(UV5) ? 0x600028UL : \ + is_uv(UV4) ? 0x600028UL : \ + is_uv(UV3) ? 0xc00028UL : \ + is_uv(UV2) ? 0xc00028UL : \ + 0) + + + +/* UVYH common defines */ +#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 +#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + +/* UV4 unique defines */ +#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 +#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + +/* UV3 unique defines */ +#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0 +#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL +#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 +#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + +/* UV2 unique defines */ +#define UV2H_GR0_GAM_GR_CONFIG_N_GR_SHFT 0 +#define UV2H_GR0_GAM_GR_CONFIG_N_GR_MASK 0x000000000000000fUL + + +union uvyh_gr0_gam_gr_config_u { + unsigned long v; + + /* UVYH common struct */ + struct uvyh_gr0_gam_gr_config_s { + unsigned long rsvd_0_9:10; + unsigned long subspace:1; /* RW */ + unsigned long rsvd_11_63:53; + } sy; + + /* UV5 unique struct */ + struct uv5h_gr0_gam_gr_config_s { + unsigned long rsvd_0_9:10; + unsigned long subspace:1; /* RW */ + unsigned long rsvd_11_63:53; + } s5; + + /* UV4 unique struct */ + struct uv4h_gr0_gam_gr_config_s { + unsigned long rsvd_0_9:10; + unsigned long subspace:1; /* RW */ + unsigned long rsvd_11_63:53; + } s4; + + /* UV3 unique struct */ + struct uv3h_gr0_gam_gr_config_s { + unsigned long m_skt:6; /* RW */ + unsigned long undef_6_9:4; /* Undefined */ + unsigned long subspace:1; /* RW */ + unsigned long reserved:53; + } s3; + + /* UV2 unique struct */ + struct uv2h_gr0_gam_gr_config_s { + unsigned long n_gr:4; /* RW */ + unsigned long reserved:60; + } s2; +}; + +/* ========================================================================= */ +/* UVH_GR0_TLB_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_TLB_INT0_CONFIG ( \ + is_uv(UV4) ? 0x61b00UL : \ + is_uv(UV3) ? 0x61b00UL : \ + is_uv(UV2) ? 0x61b00UL : \ + uv_undefined("UVH_GR0_TLB_INT0_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + + +union uvh_gr0_tlb_int0_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_GR0_TLB_INT1_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_TLB_INT1_CONFIG ( \ + is_uv(UV4) ? 0x61b40UL : \ + is_uv(UV3) ? 0x61b40UL : \ + is_uv(UV2) ? 0x61b40UL : \ + uv_undefined("UVH_GR0_TLB_INT1_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + + +union uvh_gr0_tlb_int1_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_GR1_TLB_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_GR1_TLB_INT0_CONFIG ( \ + is_uv(UV4) ? 0x62100UL : \ + is_uv(UV3) ? 0x61f00UL : \ + is_uv(UV2) ? 0x61f00UL : \ + uv_undefined("UVH_GR1_TLB_INT0_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + + +union uvh_gr1_tlb_int0_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_GR1_TLB_INT1_CONFIG */ +/* ========================================================================= */ +#define UVH_GR1_TLB_INT1_CONFIG ( \ + is_uv(UV4) ? 0x62140UL : \ + is_uv(UV3) ? 0x61f40UL : \ + is_uv(UV2) ? 0x61f40UL : \ + uv_undefined("UVH_GR1_TLB_INT1_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + + +union uvh_gr1_tlb_int1_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPB */ +/* ========================================================================= */ +#define UVH_INT_CMPB 0x22080UL + +/* UVH common defines*/ +#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + + +union uvh_int_cmpb_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s; + + /* UV5 unique struct */ + struct uv5h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s5; + + /* UV4 unique struct */ + struct uv4h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s4; + + /* UV3 unique struct */ + struct uv3h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; + + /* UV2 unique struct */ + struct uv2h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; +}; + +/* ========================================================================= */ +/* UVH_IPI_INT */ +/* ========================================================================= */ +#define UVH_IPI_INT 0x60500UL + +/* UVH common defines*/ +#define UVH_IPI_INT_VECTOR_SHFT 0 +#define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 +#define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UVH_IPI_INT_DESTMODE_SHFT 11 +#define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UVH_IPI_INT_APIC_ID_SHFT 16 +#define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UVH_IPI_INT_SEND_SHFT 63 +#define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL + + +union uvh_ipi_int_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s; + + /* UV5 unique struct */ + struct uv5h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s5; + + /* UV4 unique struct */ + struct uv4h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s4; + + /* UV3 unique struct */ + struct uv3h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s3; + + /* UV2 unique struct */ + struct uv2h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_NODE_ID */ +/* ========================================================================= */ +#define UVH_NODE_ID 0x0UL + +/* UVH common defines*/ +#define UVH_NODE_ID_FORCE1_SHFT 0 +#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UVH_NODE_ID_MANUFACTURER_SHFT 1 +#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UVH_NODE_ID_PART_NUMBER_SHFT 12 +#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UVH_NODE_ID_REVISION_SHFT 28 +#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UVH_NODE_ID_NODE_ID_SHFT 32 +#define UVH_NODE_ID_NI_PORT_SHFT 57 + +/* UVXH common defines */ +#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL + +/* UVYH common defines */ +#define UVYH_NODE_ID_NODE_ID_MASK 0x0000007f00000000UL +#define UVYH_NODE_ID_NI_PORT_MASK 0x7e00000000000000UL + +/* UV4 unique defines */ +#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48 +#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL +#define UV4H_NODE_ID_RESERVED_2_SHFT 49 +#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL + +/* UV3 unique defines */ +#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48 +#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL +#define UV3H_NODE_ID_RESERVED_2_SHFT 49 +#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL + + +union uvh_node_id_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long rsvd_32_63:32; + } s; + + /* UVXH common struct */ + struct uvxh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47_49:3; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } sx; + + /* UVYH common struct */ + struct uvyh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:7; /* RW */ + unsigned long rsvd_39_56:18; + unsigned long ni_port:6; /* RO */ + unsigned long rsvd_63:1; + } sy; + + /* UV5 unique struct */ + struct uv5h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:7; /* RW */ + unsigned long rsvd_39_56:18; + unsigned long ni_port:6; /* RO */ + unsigned long rsvd_63:1; + } s5; + + /* UV4 unique struct */ + struct uv4h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47:1; + unsigned long router_select:1; /* RO */ + unsigned long rsvd_49:1; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s4; + + /* UV3 unique struct */ + struct uv3h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47:1; + unsigned long router_select:1; /* RO */ + unsigned long rsvd_49:1; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s3; + + /* UV2 unique struct */ + struct uv2h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47_49:3; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s2; +}; + +/* ========================================================================= */ +/* UVH_NODE_PRESENT_0 */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_0 ( \ + is_uv(UV5) ? 0x1400UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_NODE_PRESENT_0_NODES_SHFT 0 +#define UVYH_NODE_PRESENT_0_NODES_MASK 0xffffffffffffffffUL + + +union uvh_node_present_0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_node_present_0_s { + unsigned long nodes:64; /* RW */ + } s; + + /* UVYH common struct */ + struct uvyh_node_present_0_s { + unsigned long nodes:64; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_node_present_0_s { + unsigned long nodes:64; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_NODE_PRESENT_1 */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_1 ( \ + is_uv(UV5) ? 0x1408UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_NODE_PRESENT_1_NODES_SHFT 0 +#define UVYH_NODE_PRESENT_1_NODES_MASK 0xffffffffffffffffUL + + +union uvh_node_present_1_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_node_present_1_s { + unsigned long nodes:64; /* RW */ + } s; + + /* UVYH common struct */ + struct uvyh_node_present_1_s { + unsigned long nodes:64; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_node_present_1_s { + unsigned long nodes:64; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_NODE_PRESENT_TABLE */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_TABLE ( \ + is_uv(UV4) ? 0x1400UL : \ + is_uv(UV3) ? 0x1400UL : \ + is_uv(UV2) ? 0x1400UL : \ + 0) + +#define UVH_NODE_PRESENT_TABLE_DEPTH ( \ + is_uv(UV4) ? 4 : \ + is_uv(UV3) ? 16 : \ + is_uv(UV2) ? 16 : \ + 0) + + +/* UVXH common defines */ +#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVXH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + + +union uvh_node_present_table_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_node_present_table_s { + unsigned long nodes:64; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_ADDR_MAP_CONFIG */ +/* ========================================================================= */ +#define UVH_RH10_GAM_ADDR_MAP_CONFIG ( \ + is_uv(UV5) ? 0x470000UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6 +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000001c0UL +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_SHFT 12 +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_MASK 0x0000000000001000UL +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_SHFT 16 +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_MASK 0x00000000000f0000UL + + +union uvh_rh10_gam_addr_map_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_addr_map_config_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long n_skt:3; /* RW */ + unsigned long undef_9_11:3; /* Undefined */ + unsigned long ls_enable:1; /* RW */ + unsigned long undef_13_15:3; /* Undefined */ + unsigned long mk_tme_keyid_bits:4; /* RW */ + unsigned long rsvd_20_63:44; + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_addr_map_config_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long n_skt:3; /* RW */ + unsigned long undef_9_11:3; /* Undefined */ + unsigned long ls_enable:1; /* RW */ + unsigned long undef_13_15:3; /* Undefined */ + unsigned long mk_tme_keyid_bits:4; /* RW */ + unsigned long rsvd_20_63:44; + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_addr_map_config_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long n_skt:3; /* RW */ + unsigned long undef_9_11:3; /* Undefined */ + unsigned long ls_enable:1; /* RW */ + unsigned long undef_13_15:3; /* Undefined */ + unsigned long mk_tme_keyid_bits:4; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_GRU_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG ( \ + is_uv(UV5) ? 0x4700b0UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 25 +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52 +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x0070000000000000UL +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffe000000UL : \ + 0) +#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV5) ? 25 : \ + -1) + +union uvh_rh10_gam_gru_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_gru_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long n_gru:3; /* RW */ + unsigned long undef_55_62:8; /* Undefined */ + unsigned long enable:1; /* RW */ + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_gru_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long n_gru:3; /* RW */ + unsigned long undef_55_62:8; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_gru_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long n_gru:3; /* RW */ + unsigned long undef_55_62:8; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 */ +/* ========================================================================= */ +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 ( \ + is_uv(UV5) ? 0x473000UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffc000000UL : \ + 0) +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \ + is_uv(UV5) ? 26 : \ + -1) + +union uvh_rh10_gam_mmioh_overlay_config0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 */ +/* ========================================================================= */ +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 ( \ + is_uv(UV5) ? 0x474000UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffc000000UL : \ + 0) +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \ + is_uv(UV5) ? 26 : \ + -1) + +union uvh_rh10_gam_mmioh_overlay_config1_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 */ +/* ========================================================================= */ +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 ( \ + is_uv(UV5) ? 0x473800UL : \ + 0) + +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \ + is_uv(UV5) ? 128 : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x000000000000007fUL + + +union uvh_rh10_gam_mmioh_redirect_config0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_redirect_config0_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_redirect_config0_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_redirect_config0_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 */ +/* ========================================================================= */ +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 ( \ + is_uv(UV5) ? 0x474800UL : \ + 0) + +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \ + is_uv(UV5) ? 128 : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x000000000000007fUL + + +union uvh_rh10_gam_mmioh_redirect_config1_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_redirect_config1_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_redirect_config1_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_redirect_config1_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH10_GAM_MMR_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG ( \ + is_uv(UV5) ? 0x470090UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 25 +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffe000000UL : \ + 0) +#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV5) ? 25 : \ + -1) + +union uvh_rh10_gam_mmr_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_mmr_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long undef_52_62:11; /* Undefined */ + unsigned long enable:1; /* RW */ + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmr_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long undef_52_62:11; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmr_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long undef_52_62:11; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ADDR_MAP_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ADDR_MAP_CONFIG ( \ + is_uv(UV4) ? 0x480000UL : \ + is_uv(UV3) ? 0x1600000UL : \ + is_uv(UV2) ? 0x1600000UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6 +#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000003c0UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL + +/* UV2 unique defines */ +#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL + + +union uvh_rh_gam_addr_map_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_addr_map_config_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_addr_map_config_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_addr_map_config_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_addr_map_config_s { + unsigned long m_skt:6; /* RW */ + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_addr_map_config_s { + unsigned long m_skt:6; /* RW */ + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x4800c8UL : \ + is_uv(UV3) ? 0x16000c8UL : \ + is_uv(UV2) ? 0x16000c8UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_alias_0_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_alias_0_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_0_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_0_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_0_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_0_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG ( \ + is_uv(UV4) ? 0x4800d0UL : \ + is_uv(UV3) ? 0x16000d0UL : \ + is_uv(UV2) ? 0x16000d0UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL + + +union uvh_rh_gam_alias_0_redirect_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_alias_0_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_0_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_0_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_0_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_0_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x4800d8UL : \ + is_uv(UV3) ? 0x16000d8UL : \ + is_uv(UV2) ? 0x16000d8UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_alias_1_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_alias_1_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_1_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_1_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_1_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_1_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG ( \ + is_uv(UV4) ? 0x4800e0UL : \ + is_uv(UV3) ? 0x16000e0UL : \ + is_uv(UV2) ? 0x16000e0UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL + + +union uvh_rh_gam_alias_1_redirect_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_alias_1_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_1_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_1_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_1_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_1_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x4800e8UL : \ + is_uv(UV3) ? 0x16000e8UL : \ + is_uv(UV2) ? 0x16000e8UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_alias_2_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_alias_2_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_2_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_2_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_2_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_2_overlay_config_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG ( \ + is_uv(UV4) ? 0x4800f0UL : \ + is_uv(UV3) ? 0x16000f0UL : \ + is_uv(UV2) ? 0x16000f0UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL + + +union uvh_rh_gam_alias_2_redirect_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_alias_2_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_2_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_2_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_2_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_2_redirect_config_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_GRU_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x480010UL : \ + is_uv(UV3) ? 0x1600010UL : \ + is_uv(UV2) ? 0x1600010UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x00f0000000000000UL +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26 +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26 +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffffc000000UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_SHFT 62 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_MASK 0x4000000000000000UL + +/* UV2 unique defines */ +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL + +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffff0000000UL : \ + is_uv(UV2) ? 0x00003ffff0000000UL : \ + 0) +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 28 : \ + is_uv(UV2) ? 28 : \ + -1) + +union uvh_rh_gam_gru_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_45:46; + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_45:46; + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4A unique struct */ + struct uv4ah_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_24:25; + unsigned long undef_25:1; /* Undefined */ + unsigned long base:26; /* RW */ + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_24:25; + unsigned long undef_25:1; /* Undefined */ + unsigned long base:20; /* RW */ + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_27:28; + unsigned long base:18; /* RW */ + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_61:6; + unsigned long mode:1; /* RW */ + unsigned long enable:1; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_27:28; + unsigned long base:18; /* RW */ + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG ( \ + is_uv(UV2) ? 0x1600030UL : \ + 0) + + + +/* UV2 unique defines */ +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT 27 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_MASK 0x00003ffff8000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_SHFT 46 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_MASK 0x000fc00000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_SHFT 52 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_MASK 0x00f0000000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV2) ? 27 : \ + uv_undefined("UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT")) + +union uvh_rh_gam_mmioh_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_overlay_config_s { + unsigned long rsvd_0_26:27; + unsigned long base:19; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_overlay_config_s { + unsigned long rsvd_0_26:27; + unsigned long base:19; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } sx; + + /* UV2 unique struct */ + struct uv2h_rh_gam_mmioh_overlay_config_s { + unsigned long rsvd_0_26:27; + unsigned long base:19; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 ( \ + is_uv(UV4) ? 0x483000UL : \ + is_uv(UV3) ? 0x1603000UL : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + 0) +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 26 : \ + -1) + +union uvh_rh_gam_mmioh_overlay_config0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4A unique struct */ + struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 ( \ + is_uv(UV4) ? 0x484000UL : \ + is_uv(UV3) ? 0x1604000UL : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + 0) +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 26 : \ + -1) + +union uvh_rh_gam_mmioh_overlay_config1_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4A unique struct */ + struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 ( \ + is_uv(UV4) ? 0x483800UL : \ + is_uv(UV3) ? 0x1603800UL : \ + 0) + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \ + is_uv(UV4) ? 128 : \ + is_uv(UV3) ? 128 : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL + + +union uvh_rh_gam_mmioh_redirect_config0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } sx; + + struct uv4ah_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:12; /* RW */ + unsigned long rsvd_12_63:52; + } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 ( \ + is_uv(UV4) ? 0x484800UL : \ + is_uv(UV3) ? 0x1604800UL : \ + 0) + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \ + is_uv(UV4) ? 128 : \ + is_uv(UV3) ? 128 : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL + + +union uvh_rh_gam_mmioh_redirect_config1_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } sx; + + struct uv4ah_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:12; /* RW */ + unsigned long rsvd_12_63:52; + } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMR_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x480028UL : \ + is_uv(UV3) ? 0x1600028UL : \ + is_uv(UV2) ? 0x1600028UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 26 +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + is_uv(UV2) ? 0x00003ffffc000000UL : \ + 0) +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26 +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + is_uv(UV2) ? 0x00003ffffc000000UL : \ + 0) + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 26 : \ + is_uv(UV2) ? 26 : \ + -1) + +union uvh_rh_gam_mmr_overlay_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmr_overlay_config_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmr_overlay_config_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmr_overlay_config_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmr_overlay_config_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_mmr_overlay_config_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RTC */ +/* ========================================================================= */ +#define UVH_RTC ( \ + is_uv(UV5) ? 0xe0000UL : \ + is_uv(UV4) ? 0xe0000UL : \ + is_uv(UV3) ? 0x340000UL : \ + is_uv(UV2) ? 0x340000UL : \ + 0) + +/* UVH common defines*/ +#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + + +union uvh_rtc_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s; + + /* UV5 unique struct */ + struct uv5h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s5; + + /* UV4 unique struct */ + struct uv4h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s4; + + /* UV3 unique struct */ + struct uv3h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; + + /* UV2 unique struct */ + struct uv2h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; +}; + +/* ========================================================================= */ +/* UVH_RTC1_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC1_INT_CONFIG 0x615c0UL + +/* UVH common defines*/ +#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC1_INT_CONFIG_P_SHFT 13 +#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC1_INT_CONFIG_T_SHFT 15 +#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC1_INT_CONFIG_M_SHFT 16 +#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + + +union uvh_rtc1_int_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s; + + /* UV5 unique struct */ + struct uv5h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s5; + + /* UV4 unique struct */ + struct uv4h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_SCRATCH5 */ +/* ========================================================================= */ +#define UVH_SCRATCH5 ( \ + is_uv(UV5) ? 0xb0200UL : \ + is_uv(UV4) ? 0xb0200UL : \ + is_uv(UV3) ? 0x2d0200UL : \ + is_uv(UV2) ? 0x2d0200UL : \ + 0) +#define UV5H_SCRATCH5 0xb0200UL +#define UV4H_SCRATCH5 0xb0200UL +#define UV3H_SCRATCH5 0x2d0200UL +#define UV2H_SCRATCH5 0x2d0200UL + +/* UVH common defines*/ +#define UVH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +/* UVXH common defines */ +#define UVXH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVXH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +/* UVYH common defines */ +#define UVYH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVYH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +/* UV5 unique defines */ +#define UV5H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV5H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +/* UV4 unique defines */ +#define UV4H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV4H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +/* UV3 unique defines */ +#define UV3H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV3H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +/* UV2 unique defines */ +#define UV2H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV2H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + + +union uvh_scratch5_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } sx; + + /* UVYH common struct */ + struct uvyh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s5; + + /* UV4 unique struct */ + struct uv4h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_SCRATCH5_ALIAS */ +/* ========================================================================= */ +#define UVH_SCRATCH5_ALIAS ( \ + is_uv(UV5) ? 0xb0208UL : \ + is_uv(UV4) ? 0xb0208UL : \ + is_uv(UV3) ? 0x2d0208UL : \ + is_uv(UV2) ? 0x2d0208UL : \ + 0) +#define UV5H_SCRATCH5_ALIAS 0xb0208UL +#define UV4H_SCRATCH5_ALIAS 0xb0208UL +#define UV3H_SCRATCH5_ALIAS 0x2d0208UL +#define UV2H_SCRATCH5_ALIAS 0x2d0208UL + + +/* ========================================================================= */ +/* UVH_SCRATCH5_ALIAS_2 */ +/* ========================================================================= */ +#define UVH_SCRATCH5_ALIAS_2 ( \ + is_uv(UV5) ? 0xb0210UL : \ + is_uv(UV4) ? 0xb0210UL : \ + is_uv(UV3) ? 0x2d0210UL : \ + is_uv(UV2) ? 0x2d0210UL : \ + 0) +#define UV5H_SCRATCH5_ALIAS_2 0xb0210UL +#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL +#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL +#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL + + + +#endif /* _ASM_X86_UV_UV_MMRS_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h new file mode 100644 index 000000000..2963a2f5d --- /dev/null +++ b/arch/x86/include/asm/vdso.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VDSO_H +#define _ASM_X86_VDSO_H + +#include +#include +#include + +#ifndef __ASSEMBLER__ + +#include + +struct vdso_image { + void *data; + unsigned long size; /* Always a multiple of PAGE_SIZE */ + + unsigned long alt, alt_len; + unsigned long extable_base, extable_len; + const void *extable; + + long sym_vvar_start; /* Negative offset to the vvar area */ + + long sym_vvar_page; + long sym_pvclock_page; + long sym_hvclock_page; + long sym_timens_page; + long sym_VDSO32_NOTE_MASK; + long sym___kernel_sigreturn; + long sym___kernel_rt_sigreturn; + long sym___kernel_vsyscall; + long sym_int80_landing_pad; + long sym_vdso32_sigreturn_landing_pad; + long sym_vdso32_rt_sigreturn_landing_pad; +}; + +#ifdef CONFIG_X86_64 +extern const struct vdso_image vdso_image_64; +#endif + +#ifdef CONFIG_X86_X32_ABI +extern const struct vdso_image vdso_image_x32; +#endif + +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT +extern const struct vdso_image vdso_image_32; +#endif + +extern void __init init_vdso_image(const struct vdso_image *image); + +extern int map_vdso_once(const struct vdso_image *image, unsigned long addr); + +extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr); +#endif /* __ASSEMBLER__ */ + +#endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h new file mode 100644 index 000000000..136e5e57c --- /dev/null +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_CLOCKSOURCE_H +#define __ASM_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_TSC, \ + VDSO_CLOCKMODE_PVCLOCK, \ + VDSO_CLOCKMODE_HVCLOCK + +#define HAVE_VDSO_CLOCKMODE_HVCLOCK + +#endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000..1936f21ed --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,323 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include +#include +#include + +#define __vdso_data (VVAR(_vdso_data)) +#define __timens_vdso_data (TIMENS(_vdso_data)) + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + +#ifdef CONFIG_PARAVIRT_CLOCK +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TIMER +extern struct ms_hyperv_tsc_page hvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return __timens_vdso_data; +} +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret; +} +#endif + +#ifdef CONFIG_HYPERV_TIMER +static u64 vread_hvclock(void) +{ + return hv_read_tsc_page(&hvclock_page); +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) +{ + if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) + return (u64)rdtsc_ordered(); + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TIMER + if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __vdso_data; +} + +static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok arch_vdso_clocksource_ok + +/* + * Clocksource read value validation to handle PV and HyperV clocksources + * which can be invalidated asynchronously and indicate invalidation by + * returning U64_MAX, which can be effectively tested by checking for a + * negative value after casting it to s64. + */ +static inline bool arch_vdso_cycles_ok(u64 cycles) +{ + return (s64)cycles >= 0; +} +#define vdso_cycles_ok arch_vdso_cycles_ok + +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h new file mode 100644 index 000000000..57b1a7034 --- /dev/null +++ b/arch/x86/include/asm/vdso/processor.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static __always_inline void rep_nop(void) +{ + asm volatile("rep; nop" ::: "memory"); +} + +static __always_inline void cpu_relax(void) +{ + rep_nop(); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000..be199a9b2 --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +DEFINE_VVAR(struct vdso_data, _vdso_data); +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__x86_get_k_vdso_data(void) +{ + return _vdso_data; +} +#define __arch_get_k_vdso_data __x86_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h new file mode 100644 index 000000000..75884d2cd --- /dev/null +++ b/arch/x86/include/asm/vermagic.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MATOM +#define MODULE_PROC_FAMILY "ATOM " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MK8 +#define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY +#else +# define MODULE_ARCH_VERMAGIC "" +#endif + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h new file mode 100644 index 000000000..46f9b2dea --- /dev/null +++ b/arch/x86/include/asm/vga.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + */ + +#ifndef _ASM_X86_VGA_H +#define _ASM_X86_VGA_H + +#include + +/* + * On the PC, we can just recalculate addresses and then + * access the videoram directly without any black magic. + * To support memory encryption however, we need to access + * the videoram as decrypted memory. + */ + +#define VGA_MAP_MEM(x, s) \ +({ \ + unsigned long start = (unsigned long)phys_to_virt(x); \ + \ + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) \ + set_memory_decrypted(start, (s) >> PAGE_SHIFT); \ + \ + start; \ +}) + +#define vga_readb(x) (*(x)) +#define vga_writeb(x, y) (*(y) = (x)) + +#endif /* _ASM_X86_VGA_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h new file mode 100644 index 000000000..7aa38b2ad --- /dev/null +++ b/arch/x86/include/asm/vgtod.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VGTOD_H +#define _ASM_X86_VGTOD_H + +/* + * This check is required to prevent ARCH=um to include + * unwanted headers. + */ +#ifdef CONFIG_GENERIC_GETTIMEOFDAY +#include +#include +#include +#include + +#include + +#ifdef BUILD_VDSO32_64 +typedef u64 gtod_long_t; +#else +typedef unsigned long gtod_long_t; +#endif +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ + +#endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 000000000..6c2e3ff3c --- /dev/null +++ b/arch/x86/include/asm/virtext.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* CPU virtualization extensions handling + * + * This should carry the code for handling CPU virtualization extensions + * that needs to live in the kernel core. + * + * Author: Eduardo Habkost + * + * Copyright (C) 2008, Red Hat Inc. + * + * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. + */ +#ifndef _ASM_X86_VIRTEX_H +#define _ASM_X86_VIRTEX_H + +#include + +#include +#include +#include + +/* + * VMX functions: + */ + +static inline int cpu_has_vmx(void) +{ + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ +} + + +/** + * cpu_vmxoff() - Disable VMX on the current CPU + * + * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) + * + * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to + * atomically track post-VMXON state, e.g. this may be called in NMI context. + * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. + * faults are guaranteed to be due to the !post-VMXON check unless the CPU is + * magically in RM, VM86, compat mode, or at CPL>0. + */ +static inline int cpu_vmxoff(void) +{ + asm_volatile_goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); + + cr4_clear_bits(X86_CR4_VMXE); + return 0; + +fault: + cr4_clear_bits(X86_CR4_VMXE); + return -EIO; +} + +static inline int cpu_vmx_enabled(void) +{ + return __read_cr4() & X86_CR4_VMXE; +} + +/** Disable VMX if it is enabled on the current CPU + * + * You shouldn't call this if cpu_has_vmx() returns 0. + */ +static inline void __cpu_emergency_vmxoff(void) +{ + if (cpu_vmx_enabled()) + cpu_vmxoff(); +} + +/** Disable VMX if it is supported and enabled on the current CPU + */ +static inline void cpu_emergency_vmxoff(void) +{ + if (cpu_has_vmx()) + __cpu_emergency_vmxoff(); +} + + + + +/* + * SVM functions: + */ + +/** Check if the CPU has SVM support + * + * You can use the 'msg' arg to get a message describing the problem, + * if the function returns zero. Simply pass NULL if you are not interested + * on the messages; gcc should take care of not generating code for + * the messages on this case. + */ +static inline int cpu_has_svm(const char **msg) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) { + if (msg) + *msg = "not amd or hygon"; + return 0; + } + + if (!boot_cpu_has(X86_FEATURE_SVM)) { + if (msg) + *msg = "svm not available"; + return 0; + } + return 1; +} + + +/** Disable SVM on the current CPU + * + * You should call this only if cpu_has_svm() returned true. + */ +static inline void cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) { + /* + * Force GIF=1 prior to disabling SVM to ensure INIT and NMI + * aren't blocked, e.g. if a fatal error occurred between CLGI + * and STGI. Note, STGI may #UD if SVM is disabled from NMI + * context between reading EFER and executing STGI. In that + * case, GIF must already be set, otherwise the NMI would have + * been blocked, so just eat the fault. + */ + asm_volatile_goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); +fault: + wrmsrl(MSR_EFER, efer & ~EFER_SVME); + } +} + +/** Makes sure SVM is disabled, if it is supported on the CPU + */ +static inline void cpu_emergency_svm_disable(void) +{ + if (cpu_has_svm(NULL)) + cpu_svm_disable(); +} + +#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h new file mode 100644 index 000000000..9e8ac5073 --- /dev/null +++ b/arch/x86/include/asm/vm86.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VM86_H +#define _ASM_X86_VM86_H + +#include +#include + +/* + * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 + * mode - the main change is that the old segment descriptors aren't + * useful any more and are forced to be zero by the kernel (and the + * hardware when a trap occurs), and the real segment descriptors are + * at the end of the structure. Look at ptrace.h to see the "normal" + * setup. For user space layout see 'struct vm86_regs' above. + */ + +struct kernel_vm86_regs { +/* + * normal regs, with special meaning for the segment descriptors.. + */ + struct pt_regs pt; +/* + * these are specific to v86 mode: + */ + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; +}; + +struct vm86 { + struct vm86plus_struct __user *user_vm86; + struct pt_regs regs32; + unsigned long veflags; + unsigned long veflags_mask; + unsigned long saved_sp0; + + unsigned long flags; + unsigned long cpu_type; + struct revectored_struct int_revectored; + struct revectored_struct int21_revectored; + struct vm86plus_info_struct vm86plus; +}; + +#ifdef CONFIG_VM86 + +void handle_vm86_fault(struct kernel_vm86_regs *, long); +int handle_vm86_trap(struct kernel_vm86_regs *, long, int); +void save_v86_state(struct kernel_vm86_regs *, int); + +struct task_struct; + +#define free_vm86(t) do { \ + struct thread_struct *__t = (t); \ + if (__t->vm86 != NULL) { \ + kfree(__t->vm86); \ + __t->vm86 = NULL; \ + } \ +} while (0) + +/* + * Support for VM86 programs to request interrupts for + * real mode hardware drivers: + */ +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 + +static inline int invalid_vm86_irq(int irq) +{ + return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; +} + +void release_vm86_irqs(struct task_struct *); + +#else + +#define handle_vm86_fault(a, b) +#define release_vm86_irqs(a) + +static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) +{ + return 0; +} + +static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { } + +#define free_vm86(t) do { } while(0) + +#endif /* CONFIG_VM86 */ + +#endif /* _ASM_X86_VM86_H */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000..49ce331f3 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#include +#include +#include + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#ifdef CONFIG_X86_64 +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return boot_cpu_has(X86_FEATURE_GBPAGES); +} +#endif + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return boot_cpu_has(X86_FEATURE_PSE); +} + +#endif + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 000000000..ac9fc51e2 --- /dev/null +++ b/arch/x86/include/asm/vmware.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +#ifndef _ASM_X86_VMWARE_H +#define _ASM_X86_VMWARE_H + +#include +#include +#include + +/* + * The hypercall definitions differ in the low word of the %edx argument + * in the following way: the old port base interface uses the port + * number to distinguish between high- and low bandwidth versions. + * + * The new vmcall interface instead uses a set of flags to select + * bandwidth mode and transfer direction. The flags should be loaded + * into %dx by any user and are automatically replaced by the port + * number if the VMWARE_HYPERVISOR_PORT method is used. + * + * In short, new driver code should strictly use the new definition of + * %dx content. + */ + +/* Old port-based version */ +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB 0x5659 + +/* Current vmcall / vmmcall version */ +#define VMWARE_HYPERVISOR_HB BIT(0) +#define VMWARE_HYPERVISOR_OUT BIT(1) + +/* The low bandwidth call. The low word of edx is presumed clear. */ +#define VMWARE_HYPERCALL \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \ + "inl (%%dx), %%eax", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) + +/* + * The high bandwidth out call. The low word of edx is presumed to have the + * HB and OUT bits set. + */ +#define VMWARE_HYPERCALL_HB_OUT \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep outsb", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) + +/* + * The high bandwidth in call. The low word of edx is presumed to have the + * HB bit set. + */ +#define VMWARE_HYPERCALL_HB_IN \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep insb", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) +#endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h new file mode 100644 index 000000000..498dc600b --- /dev/null +++ b/arch/x86/include/asm/vmx.h @@ -0,0 +1,630 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * vmx.h: VMX Architecture related definitions + * Copyright (c) 2004, Intel Corporation. + * + * A few random additions are: + * Copyright (C) 2006 Qumranet + * Avi Kivity + * Yaniv Kamay + */ +#ifndef VMX_H +#define VMX_H + + +#include +#include +#include +#include + +#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING) +#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING) +#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) +#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) +#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) +#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING) +#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) +#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) +#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) +#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS) +#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) +#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) +#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) +#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING) +#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) +#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) +#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) +#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG) +#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS) +#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING) +#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING) +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS) + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) +#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) +#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) +#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) +#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) +#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) +#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST) +#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT) +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY) +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING) +#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING) +#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID) +#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC) +#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS) +#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) +#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) +#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) +#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES) +#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) +#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) +#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) +#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION) +#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING) + +/* + * Definitions of Tertiary Processor-Based VM-Execution Controls. + */ +#define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT) + +#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) +#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) +#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) +#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER) +#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR) + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_EXIT_CLEAR_BNDCFGS 0x00800000 +#define VM_EXIT_PT_CONCEAL_PIP 0x01000000 +#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VM_ENTRY_LOAD_BNDCFGS 0x00010000 +#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000 +#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 +#define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 +#define VMX_MISC_ZERO_LEN_INS 0x40000000 +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 + +/* VMFUNC functions */ +#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) + +#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) +#define VMFUNC_EPTP_ENTRIES 512 + +static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) +{ + return vmx_basic & GENMASK_ULL(30, 0); +} + +static inline u32 vmx_basic_vmcs_size(u64 vmx_basic) +{ + return (vmx_basic & GENMASK_ULL(44, 32)) >> 32; +} + +static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) +{ + return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; +} + +static inline int vmx_misc_cr3_count(u64 vmx_misc) +{ + return (vmx_misc & GENMASK_ULL(24, 16)) >> 16; +} + +static inline int vmx_misc_max_msr(u64 vmx_misc) +{ + return (vmx_misc & GENMASK_ULL(27, 25)) >> 25; +} + +static inline int vmx_misc_mseg_revid(u64 vmx_misc) +{ + return (vmx_misc & GENMASK_ULL(63, 32)) >> 32; +} + +/* VMCS Encodings */ +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + LAST_PID_POINTER_INDEX = 0x00000008, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + VM_FUNCTION_CONTROL = 0x00002018, + VM_FUNCTION_CONTROL_HIGH = 0x00002019, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + EPTP_LIST_ADDRESS = 0x00002024, + EPTP_LIST_ADDRESS_HIGH = 0x00002025, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + ENCLS_EXITING_BITMAP = 0x0000202E, + ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + TERTIARY_VM_EXEC_CONTROL = 0x00002034, + TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, + PID_POINTER_TABLE = 0x00002042, + PID_POINTER_TABLE_HIGH = 0x00002043, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + GUEST_IA32_RTIT_CTL = 0x00002814, + GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + NOTIFY_WINDOW = 0x00004024, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0x00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +/* + * Interruption-information format + */ +#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ +#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ +#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ +#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ +#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 + +#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK +#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK +#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK +#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK + +#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ +#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ +#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ +#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ +#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ +#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ + +/* GUEST_INTERRUPTIBILITY_INFO flags. */ +#define GUEST_INTR_STATE_STI 0x00000001 +#define GUEST_INTR_STATE_MOV_SS 0x00000002 +#define GUEST_INTR_STATE_SMI 0x00000004 +#define GUEST_INTR_STATE_NMI 0x00000008 +#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010 + +/* GUEST_ACTIVITY_STATE flags */ +#define GUEST_ACTIVITY_ACTIVE 0 +#define GUEST_ACTIVITY_HLT 1 +#define GUEST_ACTIVITY_SHUTDOWN 2 +#define GUEST_ACTIVITY_WAIT_SIPI 3 + +/* + * Exit Qualifications for MOV for Control Register Access + */ +#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control reg.*/ +#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */ +#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose reg. */ +#define LMSW_SOURCE_DATA_SHIFT 16 +#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */ +#define REG_EAX (0 << 8) +#define REG_ECX (1 << 8) +#define REG_EDX (2 << 8) +#define REG_EBX (3 << 8) +#define REG_ESP (4 << 8) +#define REG_EBP (5 << 8) +#define REG_ESI (6 << 8) +#define REG_EDI (7 << 8) +#define REG_R8 (8 << 8) +#define REG_R9 (9 << 8) +#define REG_R10 (10 << 8) +#define REG_R11 (11 << 8) +#define REG_R12 (12 << 8) +#define REG_R13 (13 << 8) +#define REG_R14 (14 << 8) +#define REG_R15 (15 << 8) + +/* + * Exit Qualifications for MOV for Debug Register Access + */ +#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug reg. */ +#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ +#define TYPE_MOV_TO_DR (0 << 4) +#define TYPE_MOV_FROM_DR (1 << 4) +#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ + + +/* + * Exit Qualifications for APIC-Access + */ +#define APIC_ACCESS_OFFSET 0xfff /* 11:0, offset within the APIC page */ +#define APIC_ACCESS_TYPE 0xf000 /* 15:12, access type */ +#define TYPE_LINEAR_APIC_INST_READ (0 << 12) +#define TYPE_LINEAR_APIC_INST_WRITE (1 << 12) +#define TYPE_LINEAR_APIC_INST_FETCH (2 << 12) +#define TYPE_LINEAR_APIC_EVENT (3 << 12) +#define TYPE_PHYSICAL_APIC_EVENT (10 << 12) +#define TYPE_PHYSICAL_APIC_INST (15 << 12) + +/* segment AR in VMCS -- these are different from what LAR reports */ +#define VMX_SEGMENT_AR_L_MASK (1 << 13) + +#define VMX_AR_TYPE_ACCESSES_MASK 1 +#define VMX_AR_TYPE_READABLE_MASK (1 << 1) +#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2) +#define VMX_AR_TYPE_CODE_MASK (1 << 3) +#define VMX_AR_TYPE_MASK 0x0f +#define VMX_AR_TYPE_BUSY_64_TSS 11 +#define VMX_AR_TYPE_BUSY_32_TSS 11 +#define VMX_AR_TYPE_BUSY_16_TSS 3 +#define VMX_AR_TYPE_LDT 2 + +#define VMX_AR_UNUSABLE_MASK (1 << 16) +#define VMX_AR_S_MASK (1 << 4) +#define VMX_AR_P_MASK (1 << 7) +#define VMX_AR_L_MASK (1 << 13) +#define VMX_AR_DB_MASK (1 << 14) +#define VMX_AR_G_MASK (1 << 15) +#define VMX_AR_DPL_SHIFT 5 +#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3) + +#define VMX_AR_RESERVD_MASK 0xfffe0f00 + +#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2) + +#define VMX_NR_VPIDS (1 << 16) +#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0 +#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 +#define VMX_VPID_EXTENT_ALL_CONTEXT 2 +#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3 + +#define VMX_EPT_EXTENT_CONTEXT 1 +#define VMX_EPT_EXTENT_GLOBAL 2 +#define VMX_EPT_EXTENT_SHIFT 24 + +#define VMX_EPT_EXECUTE_ONLY_BIT (1ull) +#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) +#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) +#define VMX_EPTP_UC_BIT (1ull << 8) +#define VMX_EPTP_WB_BIT (1ull << 14) +#define VMX_EPT_2MB_PAGE_BIT (1ull << 16) +#define VMX_EPT_1GB_PAGE_BIT (1ull << 17) +#define VMX_EPT_INVEPT_BIT (1ull << 20) +#define VMX_EPT_AD_BIT (1ull << 21) +#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) +#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) + +#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */ +#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */ +#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ +#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ +#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ + +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPTP_PWL_MASK 0x38ull +#define VMX_EPTP_PWL_4 0x18ull +#define VMX_EPTP_PWL_5 0x20ull +#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +#define VMX_EPTP_MT_MASK 0x7ull +#define VMX_EPTP_MT_WB 0x6ull +#define VMX_EPTP_MT_UC 0x0ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IPAT_BIT (1ull << 6) +#define VMX_EPT_ACCESS_BIT (1ull << 8) +#define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ + VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) +#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) + +static inline u8 vmx_eptp_page_walk_level(u64 eptp) +{ + u64 encoded_level = eptp & VMX_EPTP_PWL_MASK; + + if (encoded_level == VMX_EPTP_PWL_5) + return 5; + + /* @eptp must be pre-validated by the caller. */ + WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4); + return 4; +} + +/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ +#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) + +#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + +struct vmx_msr_entry { + u32 index; + u32 reserved; + u64 value; +} __aligned(16); + +/* + * Exit Qualifications for entry failure during or after loading guest state + */ +enum vm_entry_failure_code { + ENTRY_FAIL_DEFAULT = 0, + ENTRY_FAIL_PDPTE = 2, + ENTRY_FAIL_NMI = 3, + ENTRY_FAIL_VMCS_LINK_PTR = 4, +}; + +/* + * Exit Qualifications for EPT Violations + */ +#define EPT_VIOLATION_ACC_READ_BIT 0 +#define EPT_VIOLATION_ACC_WRITE_BIT 1 +#define EPT_VIOLATION_ACC_INSTR_BIT 2 +#define EPT_VIOLATION_RWX_SHIFT 3 +#define EPT_VIOLATION_GVA_IS_VALID_BIT 7 +#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8 +#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) +#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) +#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) +#define EPT_VIOLATION_RWX_MASK (VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT) +#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT) +#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) + +/* + * Exit Qualifications for NOTIFY VM EXIT + */ +#define NOTIFY_VM_CONTEXT_INVALID BIT(0) + +/* + * VM-instruction error numbers + */ +enum vm_instruction_error_number { + VMXERR_VMCALL_IN_VMX_ROOT_OPERATION = 1, + VMXERR_VMCLEAR_INVALID_ADDRESS = 2, + VMXERR_VMCLEAR_VMXON_POINTER = 3, + VMXERR_VMLAUNCH_NONCLEAR_VMCS = 4, + VMXERR_VMRESUME_NONLAUNCHED_VMCS = 5, + VMXERR_VMRESUME_AFTER_VMXOFF = 6, + VMXERR_ENTRY_INVALID_CONTROL_FIELD = 7, + VMXERR_ENTRY_INVALID_HOST_STATE_FIELD = 8, + VMXERR_VMPTRLD_INVALID_ADDRESS = 9, + VMXERR_VMPTRLD_VMXON_POINTER = 10, + VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID = 11, + VMXERR_UNSUPPORTED_VMCS_COMPONENT = 12, + VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT = 13, + VMXERR_VMXON_IN_VMX_ROOT_OPERATION = 15, + VMXERR_ENTRY_INVALID_EXECUTIVE_VMCS_POINTER = 16, + VMXERR_ENTRY_NONLAUNCHED_EXECUTIVE_VMCS = 17, + VMXERR_ENTRY_EXECUTIVE_VMCS_POINTER_NOT_VMXON_POINTER = 18, + VMXERR_VMCALL_NONCLEAR_VMCS = 19, + VMXERR_VMCALL_INVALID_VM_EXIT_CONTROL_FIELDS = 20, + VMXERR_VMCALL_INCORRECT_MSEG_REVISION_ID = 22, + VMXERR_VMXOFF_UNDER_DUAL_MONITOR_TREATMENT_OF_SMIS_AND_SMM = 23, + VMXERR_VMCALL_INVALID_SMM_MONITOR_FEATURES = 24, + VMXERR_ENTRY_INVALID_VM_EXECUTION_CONTROL_FIELDS_IN_EXECUTIVE_VMCS = 25, + VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS = 26, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28, +}; + +/* + * VM-instruction errors that can be encountered on VM-Enter, used to trace + * nested VM-Enter failures reported by hardware. Errors unique to VM-Enter + * from a SMI Transfer Monitor are not included as things have gone seriously + * sideways if we get one of those... + */ +#define VMX_VMENTER_INSTRUCTION_ERRORS \ + { VMXERR_VMLAUNCH_NONCLEAR_VMCS, "VMLAUNCH_NONCLEAR_VMCS" }, \ + { VMXERR_VMRESUME_NONLAUNCHED_VMCS, "VMRESUME_NONLAUNCHED_VMCS" }, \ + { VMXERR_VMRESUME_AFTER_VMXOFF, "VMRESUME_AFTER_VMXOFF" }, \ + { VMXERR_ENTRY_INVALID_CONTROL_FIELD, "VMENTRY_INVALID_CONTROL_FIELD" }, \ + { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, "VMENTRY_INVALID_HOST_STATE_FIELD" }, \ + { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" } + +enum vmx_l1d_flush_state { + VMENTER_L1D_FLUSH_AUTO, + VMENTER_L1D_FLUSH_NEVER, + VMENTER_L1D_FLUSH_COND, + VMENTER_L1D_FLUSH_ALWAYS, + VMENTER_L1D_FLUSH_EPT_DISABLED, + VMENTER_L1D_FLUSH_NOT_REQUIRED, +}; + +extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; + +#endif diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h new file mode 100644 index 000000000..c6a7eed03 --- /dev/null +++ b/arch/x86/include/asm/vmxfeatures.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VMXFEATURES_H +#define _ASM_X86_VMXFEATURES_H + +/* + * Defines VMX CPU feature bits + */ +#define NVMXINTS 5 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */ +#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */ +#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */ +#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */ +#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */ +#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */ + +/* EPT/VPID features, scattered to bits 16-23 */ +#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */ +#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */ +#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */ +#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */ + +/* Aggregated APIC features 24-27 */ +#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */ +#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ + +/* VM-Functions, shifted to bits 28-31 */ +#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */ + +/* Primary Processor-Based VM-Execution Controls, word 1 */ +#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ +#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */ +#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */ +#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */ +#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */ +#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ +#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ +#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ +#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */ +#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ +#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ +#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ +#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */ +#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/ +#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */ +#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */ +#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */ +#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */ +#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */ +#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */ + +/* Secondary Processor-Based VM-Execution Controls, word 2 */ +#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */ +#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */ +#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */ +#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */ +#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */ +#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */ +#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */ +#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */ +#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */ +#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */ +#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */ +#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/ +#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */ +#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */ +#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */ +#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */ +#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */ +#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */ +#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */ +#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ +#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ +#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ +#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ +#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ +#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ +#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */ + +/* Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ +#endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h new file mode 100644 index 000000000..ab60a71a8 --- /dev/null +++ b/arch/x86/include/asm/vsyscall.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VSYSCALL_H +#define _ASM_X86_VSYSCALL_H + +#include +#include + +#ifdef CONFIG_X86_VSYSCALL_EMULATION +extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); + +/* + * Called on instruction fetch fault in vsyscall page. + * Returns true if handled. + */ +extern bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address); +#else +static inline void map_vsyscall(void) {} +static inline bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) +{ + return false; +} +#endif + +#endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h new file mode 100644 index 000000000..183e98e49 --- /dev/null +++ b/arch/x86/include/asm/vvar.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * vvar.h: Shared vDSO/kernel variable declarations + * Copyright (c) 2011 Andy Lutomirski + * + * A handful of variables are accessible (read-only) from userspace + * code in the vsyscall page and the vdso. They are declared here. + * Some other file must define them with DEFINE_VVAR. + * + * In normal kernel code, they are used like any other variable. + * In user code, they are accessed through the VVAR macro. + * + * These variables live in a page of kernel data that has an extra RO + * mapping for userspace. Each variable needs a unique offset within + * that page; specify that offset with the DECLARE_VVAR macro. (If + * you mess up, the linker will catch it.) + */ + +#ifndef _ASM_X86_VVAR_H +#define _ASM_X86_VVAR_H + +#ifdef EMIT_VVAR +/* + * EMIT_VVAR() is used by the kernel linker script to put vvars in the + * right place. Also, it's used by kernel code to import offsets values. + */ +#define DECLARE_VVAR(offset, type, name) \ + EMIT_VVAR(name, offset) + +#else + +extern char __vvar_page; + +#define DECLARE_VVAR(offset, type, name) \ + extern type vvar_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); \ + extern type timens_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); \ + +#define VVAR(name) (vvar_ ## name) +#define TIMENS(name) (timens_ ## name) + +#define DEFINE_VVAR(type, name) \ + type name[CS_BASES] \ + __attribute__((section(".vvar_" #name), aligned(16))) __visible + +#endif + +/* DECLARE_VVAR(offset, type, name) */ + +DECLARE_VVAR(128, struct vdso_data, _vdso_data) + +#undef DECLARE_VVAR + +#endif diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h new file mode 100644 index 000000000..46b4f1f7f --- /dev/null +++ b/arch/x86/include/asm/word-at-a-time.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +#include + +/* + * This is largely generic for little-endian machines, but the + * optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) +{ + return count_masked_bytes(mask); +} + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret; + + asm volatile( + "1: mov %[mem], %[ret]\n" + "2:\n" + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD) + : [ret] "=r" (ret) + : [mem] "m" (*(unsigned long *)addr)); + + return ret; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h new file mode 100644 index 000000000..034e62838 --- /dev/null +++ b/arch/x86/include/asm/x86_init.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PLATFORM_H +#define _ASM_X86_PLATFORM_H + +#include + +struct ghcb; +struct mpc_bus; +struct mpc_cpu; +struct pt_regs; +struct mpc_table; +struct cpuinfo_x86; +struct irq_domain; + +/** + * struct x86_init_mpparse - platform specific mpparse ops + * @setup_ioapic_ids: platform specific ioapic id override + * @find_smp_config: find the smp configuration + * @get_smp_config: get the smp configuration + */ +struct x86_init_mpparse { + void (*setup_ioapic_ids)(void); + void (*find_smp_config)(void); + void (*get_smp_config)(unsigned int early); +}; + +/** + * struct x86_init_resources - platform specific resource related ops + * @probe_roms: probe BIOS roms + * @reserve_resources: reserve the standard resources for the + * platform + * @memory_setup: platform specific memory setup + * + */ +struct x86_init_resources { + void (*probe_roms)(void); + void (*reserve_resources)(void); + char *(*memory_setup)(void); +}; + +/** + * struct x86_init_irqs - platform specific interrupt setup + * @pre_vector_init: init code to run before interrupt vectors + * are set up. + * @intr_init: interrupt init code + * @intr_mode_select: interrupt delivery mode selection + * @intr_mode_init: interrupt delivery mode setup + * @create_pci_msi_domain: Create the PCI/MSI interrupt domain + */ +struct x86_init_irqs { + void (*pre_vector_init)(void); + void (*intr_init)(void); + void (*intr_mode_select)(void); + void (*intr_mode_init)(void); + struct irq_domain *(*create_pci_msi_domain)(void); +}; + +/** + * struct x86_init_oem - oem platform specific customizing functions + * @arch_setup: platform specific architecture setup + * @banner: print a platform specific banner + */ +struct x86_init_oem { + void (*arch_setup)(void); + void (*banner)(void); +}; + +/** + * struct x86_init_paging - platform specific paging functions + * @pagetable_init: platform specific paging initialization call to setup + * the kernel pagetables and prepare accessors functions. + * Callback must call paging_init(). Called once after the + * direct mapping for phys memory is available. + */ +struct x86_init_paging { + void (*pagetable_init)(void); +}; + +/** + * struct x86_init_timers - platform specific timer setup + * @setup_perpcu_clockev: set up the per cpu clock event device for the + * boot cpu + * @timer_init: initialize the platform timer (default PIT/HPET) + * @wallclock_init: init the wallclock device + */ +struct x86_init_timers { + void (*setup_percpu_clockev)(void); + void (*timer_init)(void); + void (*wallclock_init)(void); +}; + +/** + * struct x86_init_iommu - platform specific iommu setup + * @iommu_init: platform specific iommu setup + */ +struct x86_init_iommu { + int (*iommu_init)(void); +}; + +/** + * struct x86_init_pci - platform specific pci init functions + * @arch_init: platform specific pci arch init call + * @init: platform specific pci subsystem init + * @init_irq: platform specific pci irq init + * @fixup_irqs: platform specific pci irq fixup + */ +struct x86_init_pci { + int (*arch_init)(void); + int (*init)(void); + void (*init_irq)(void); + void (*fixup_irqs)(void); +}; + +/** + * struct x86_hyper_init - x86 hypervisor init functions + * @init_platform: platform setup + * @guest_late_init: guest late init + * @x2apic_available: X2APIC detection + * @msi_ext_dest_id: MSI supports 15-bit APIC IDs + * @init_mem_mapping: setup early mappings during init_mem_mapping() + * @init_after_bootmem: guest init after boot allocator is finished + */ +struct x86_hyper_init { + void (*init_platform)(void); + void (*guest_late_init)(void); + bool (*x2apic_available)(void); + bool (*msi_ext_dest_id)(void); + void (*init_mem_mapping)(void); + void (*init_after_bootmem)(void); +}; + +/** + * struct x86_init_acpi - x86 ACPI init functions + * @set_root_poitner: set RSDP address + * @get_root_pointer: get RSDP address + * @reduced_hw_early_init: hardware reduced platform early init + */ +struct x86_init_acpi { + void (*set_root_pointer)(u64 addr); + u64 (*get_root_pointer)(void); + void (*reduced_hw_early_init)(void); +}; + +/** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range is changed + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ +struct x86_guest { + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + +/** + * struct x86_init_ops - functions for platform specific setup + * + */ +struct x86_init_ops { + struct x86_init_resources resources; + struct x86_init_mpparse mpparse; + struct x86_init_irqs irqs; + struct x86_init_oem oem; + struct x86_init_paging paging; + struct x86_init_timers timers; + struct x86_init_iommu iommu; + struct x86_init_pci pci; + struct x86_hyper_init hyper; + struct x86_init_acpi acpi; +}; + +/** + * struct x86_cpuinit_ops - platform specific cpu hotplug setups + * @setup_percpu_clockev: set up the per cpu clock event device + * @early_percpu_clock_init: early init of the per cpu clock event device + */ +struct x86_cpuinit_ops { + void (*setup_percpu_clockev)(void); + void (*early_percpu_clock_init)(void); + void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); +}; + +struct timespec64; + +/** + * struct x86_legacy_devices - legacy x86 devices + * + * @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform + * is known to never have a PNPBIOS. + * + * These are devices known to require LPC or ISA bus. The definition of legacy + * devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag + * ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on + * the LPC or ISA bus. User visible devices are devices that have end-user + * accessible connectors (for example, LPT parallel port). Legacy devices on + * the LPC bus consist for example of serial and parallel ports, PS/2 keyboard + * / mouse, and the floppy disk controller. A system that lacks all known + * legacy devices can assume all devices can be detected exclusively via + * standard device enumeration mechanisms including the ACPI namespace. + * + * A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not + * have any of the legacy devices enumerated below present. + */ +struct x86_legacy_devices { + int pnpbios; +}; + +/** + * enum x86_legacy_i8042_state - i8042 keyboard controller state + * @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on + * given platform/subarch. + * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller + * is absent. + * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be + * present, the i8042 driver should probe for controller existence. + */ +enum x86_legacy_i8042_state { + X86_LEGACY_I8042_PLATFORM_ABSENT, + X86_LEGACY_I8042_FIRMWARE_ABSENT, + X86_LEGACY_I8042_EXPECTED_PRESENT, +}; + +/** + * struct x86_legacy_features - legacy x86 features + * + * @i8042: indicated if we expect the device to have i8042 controller + * present. + * @rtc: this device has a CMOS real-time clock present + * @reserve_bios_regions: boot code will search for the EBDA address and the + * start of the 640k - 1M BIOS region. If false, the platform must + * ensure that its memory map correctly reserves sub-1MB regions as needed. + * @devices: legacy x86 devices, refer to struct x86_legacy_devices + * documentation for further details. + */ +struct x86_legacy_features { + enum x86_legacy_i8042_state i8042; + int rtc; + int warm_reset; + int no_vga; + int reserve_bios_regions; + struct x86_legacy_devices devices; +}; + +/** + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks + * + * @pin_vcpu: pin current vcpu to specified physical + * cpu (run rarely) + * @sev_es_hcall_prepare: Load additional hypervisor-specific + * state into the GHCB when doing a VMMCALL under + * SEV-ES. Called from the #VC exception handler. + * @sev_es_hcall_finish: Copies state from the GHCB back into the + * processor (or pt_regs). Also runs checks on the + * state returned from the hypervisor after a + * VMMCALL under SEV-ES. Needs to return 'false' + * if the checks fail. Called from the #VC + * exception handler. + */ +struct x86_hyper_runtime { + void (*pin_vcpu)(int cpu); + void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs); + bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs); +}; + +/** + * struct x86_platform_ops - platform specific runtime functions + * @calibrate_cpu: calibrate CPU + * @calibrate_tsc: calibrate TSC, if different from CPU + * @get_wallclock: get time from HW clock like RTC etc. + * @set_wallclock: set time back to HW clock + * @is_untracked_pat_range exclude from PAT logic + * @nmi_init enable NMI on cpus + * @save_sched_clock_state: save state for sched_clock() on suspend + * @restore_sched_clock_state: restore state for sched_clock() on resume + * @apic_post_init: adjust apic if needed + * @legacy: legacy features + * @set_legacy_features: override legacy features. Use of this callback + * is highly discouraged. You should only need + * this if your hardware platform requires further + * custom fine tuning far beyond what may be + * possible in x86_early_init_platform_quirks() by + * only using the current x86_hardware_subarch + * semantics. + * @realmode_reserve: reserve memory for realmode trampoline + * @realmode_init: initialize realmode trampoline + * @hyper: x86 hypervisor specific runtime callbacks + */ +struct x86_platform_ops { + unsigned long (*calibrate_cpu)(void); + unsigned long (*calibrate_tsc)(void); + void (*get_wallclock)(struct timespec64 *ts); + int (*set_wallclock)(const struct timespec64 *ts); + void (*iommu_shutdown)(void); + bool (*is_untracked_pat_range)(u64 start, u64 end); + void (*nmi_init)(void); + unsigned char (*get_nmi_reason)(void); + void (*save_sched_clock_state)(void); + void (*restore_sched_clock_state)(void); + void (*apic_post_init)(void); + struct x86_legacy_features legacy; + void (*set_legacy_features)(void); + void (*realmode_reserve)(void); + void (*realmode_init)(void); + struct x86_hyper_runtime hyper; + struct x86_guest guest; +}; + +struct x86_apic_ops { + unsigned int (*io_apic_read) (unsigned int apic, unsigned int reg); + void (*restore)(void); +}; + +extern struct x86_init_ops x86_init; +extern struct x86_cpuinit_ops x86_cpuinit; +extern struct x86_platform_ops x86_platform; +extern struct x86_msi_ops x86_msi; +extern struct x86_apic_ops x86_apic_ops; + +extern void x86_early_init_platform_quirks(void); +extern void x86_init_noop(void); +extern void x86_init_uint_noop(unsigned int unused); +extern bool bool_x86_init_noop(void); +extern void x86_op_int_noop(int cpu); +extern bool x86_pnpbios_disabled(void); + +#endif diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h new file mode 100644 index 000000000..6daa9b0c8 --- /dev/null +++ b/arch/x86/include/asm/xen/cpuid.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* + * For compatibility with other hypervisor interfaces, the Xen cpuid leaves + * can be found at the first otherwise unused 0x100 aligned boundary starting + * from 0x40000000. + * + * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid + * leaves will start at 0x40000100 + */ + +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000x00) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000x01) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000x02) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +/* + * Leaf 4 (0x40000x03) + * Sub-leaf 0: EAX: bit 0: emulated tsc + * bit 1: host tsc is known to be reliable + * bit 2: RDTSCP instruction available + * EBX: tsc_mode: 0=default (emulate if necessary), 1=emulate, + * 2=no emulation, 3=no emulation + TSC_AUX support + * ECX: guest tsc frequency in kHz + * EDX: guest tsc incarnation (migration count) + * Sub-leaf 1: EAX: tsc offset low part + * EBX: tsc offset high part + * ECX: multiplicator for tsc->ns conversion + * EDX: shift amount for tsc->ns conversion + * Sub-leaf 2: EAX: host tsc frequency in kHz + */ + +/* + * Leaf 5 (0x40000x04) + * HVM-specific features + * Sub-leaf 0: EAX: Features + * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) + */ +#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ +#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ +/* Memory mapped from other domains has valid IOMMU entries */ +#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */ +/* + * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be + * used to store high bits for the Destination ID. This expands the Destination + * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768. + */ +#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5) +/* Per-vCPU event channel upcalls */ +#define XEN_HVM_CPUID_UPCALL_VECTOR (1u << 6) + +/* + * Leaf 6 (0x40000x05) + * PV-specific parameters + * Sub-leaf 0: EAX: max available sub-leaf + * Sub-leaf 0: EBX: bits 0-7: max machine address width + */ + +/* Max. address width in bits taking memory hotplug into account. */ +#define XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK (0xffu << 0) + +#define XEN_CPUID_MAX_NUM_LEAVES 5 + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h new file mode 100644 index 000000000..62bdceb59 --- /dev/null +++ b/arch/x86/include/asm/xen/events.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_XEN_EVENTS_H +#define _ASM_X86_XEN_EVENTS_H + +#include + +enum ipi_vector { + XEN_RESCHEDULE_VECTOR, + XEN_CALL_FUNCTION_VECTOR, + XEN_CALL_FUNCTION_SINGLE_VECTOR, + XEN_SPIN_UNLOCK_VECTOR, + XEN_IRQ_WORK_VECTOR, + XEN_NMI_VECTOR, + + XEN_NR_IPIS, +}; + +static inline int xen_irqs_disabled(struct pt_regs *regs) +{ + return raw_irqs_disabled_flags(regs->flags); +} + +/* No need for a barrier -- XCHG is a barrier on x86. */ +#define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) + +extern bool xen_have_vector_callback; + +/* + * Events delivered via platform PCI interrupts are always + * routed to vcpu 0 and hence cannot be rebound. + */ +static inline bool xen_support_evtchn_rebind(void) +{ + return (!xen_hvm_domain() || xen_have_vector_callback); +} + +extern bool xen_percpu_upcall; +#endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h new file mode 100644 index 000000000..e5e0fe10c --- /dev/null +++ b/arch/x86/include/asm/xen/hypercall.h @@ -0,0 +1,504 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_X86_XEN_HYPERCALL_H +#define _ASM_X86_XEN_HYPERCALL_H + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +struct xen_dm_op_buf; + +/* + * The hypercall asms have to meet several constraints: + * - Work on 32- and 64-bit. + * The two architectures put their arguments in different sets of + * registers. + * + * - Work around asm syntax quirks + * It isn't possible to specify one of the rNN registers in a + * constraint, so we use explicit register variables to get the + * args into the right place. + * + * - Mark all registers as potentially clobbered + * Even unused parameters can be clobbered by the hypervisor, so we + * need to make sure gcc knows it. + * + * - Avoid compiler bugs. + * This is the tricky part. Because x86_32 has such a constrained + * register set, gcc versions below 4.3 have trouble generating + * code when all the arg registers and memory are trashed by the + * asm. There are syntactically simpler ways of achieving the + * semantics below, but they cause the compiler to crash. + * + * The only combination I found which works is: + * - assign the __argX variables first + * - list all actually used parameters as "+r" (__argX) + * - clobber the rest + * + * The result certainly isn't pretty, and it really shows up cpp's + * weakness as a macro language. Sorry. (But let's just give thanks + * there aren't more than 5 arguments...) + */ + +extern struct { char _entry[32]; } hypercall_page[]; + +#define __HYPERCALL "call hypercall_page+%c[offset]" +#define __HYPERCALL_ENTRY(x) \ + [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) + +#ifdef CONFIG_X86_32 +#define __HYPERCALL_RETREG "eax" +#define __HYPERCALL_ARG1REG "ebx" +#define __HYPERCALL_ARG2REG "ecx" +#define __HYPERCALL_ARG3REG "edx" +#define __HYPERCALL_ARG4REG "esi" +#define __HYPERCALL_ARG5REG "edi" +#else +#define __HYPERCALL_RETREG "rax" +#define __HYPERCALL_ARG1REG "rdi" +#define __HYPERCALL_ARG2REG "rsi" +#define __HYPERCALL_ARG3REG "rdx" +#define __HYPERCALL_ARG4REG "r10" +#define __HYPERCALL_ARG5REG "r8" +#endif + +#define __HYPERCALL_DECLS \ + register unsigned long __res asm(__HYPERCALL_RETREG); \ + register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \ + register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ + register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ + register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; + +#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT +#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) +#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) +#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) +#define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4) +#define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5) + +#define __HYPERCALL_0ARG() +#define __HYPERCALL_1ARG(a1) \ + __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1); +#define __HYPERCALL_2ARG(a1,a2) \ + __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2); +#define __HYPERCALL_3ARG(a1,a2,a3) \ + __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3); +#define __HYPERCALL_4ARG(a1,a2,a3,a4) \ + __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4); +#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \ + __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5); + +#define __HYPERCALL_CLOBBER5 "memory" +#define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG +#define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG +#define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG +#define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG +#define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG + +#define _hypercall0(type, name) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_0ARG(); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_0PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER0); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_1ARG(a1); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_1PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER1); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_2ARG(a1, a2); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_2PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER2); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_3ARG(a1, a2, a3); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_3PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER3); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_4ARG(a1, a2, a3, a4); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_4PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER4); \ + (type)__res; \ +}) + +static inline long +xen_single_call(unsigned int call, + unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5) +{ + __HYPERCALL_DECLS; + __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + + if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) + return -EINVAL; + + asm volatile(CALL_NOSPEC + : __HYPERCALL_5PARAM + : [thunk_target] "a" (&hypercall_page[call]) + : __HYPERCALL_CLOBBER5); + + return (long)__res; +} + +static __always_inline void __xen_stac(void) +{ + /* + * Suppress objtool seeing the STAC/CLAC and getting confused about it + * calling random code with AC=1. + */ + asm volatile(ANNOTATE_IGNORE_ALTERNATIVE + ASM_STAC ::: "memory", "flags"); +} + +static __always_inline void __xen_clac(void) +{ + asm volatile(ANNOTATE_IGNORE_ALTERNATIVE + ASM_CLAC ::: "memory", "flags"); +} + +static inline long +privcmd_call(unsigned int call, + unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5) +{ + long res; + + __xen_stac(); + res = xen_single_call(call, a1, a2, a3, a4, a5); + __xen_clac(); + + return res; +} + +#ifdef CONFIG_XEN_PV +static inline int +HYPERVISOR_set_trap_table(struct trap_info *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update(struct mmu_update *req, int count, + int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_callback_op(int cmd, void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static __always_inline int +HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static __always_inline unsigned long +HYPERVISOR_get_debugreg(int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor(u64 ma, u64 desc) +{ + return _hypercall2(int, update_descriptor, ma, desc); +} + +static inline int +HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, + unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_set_segment_base(int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; + + trace_xen_mc_entry(mcl, 1); +} + +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + + trace_xen_mc_entry(mcl, 3); +} + +static inline void +MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, + struct desc_struct desc) +{ + mcl->op = __HYPERVISOR_update_descriptor; + mcl->args[0] = maddr; + mcl->args[1] = *(unsigned long *)&desc; + + trace_xen_mc_entry(mcl, 2); +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)op; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_stack_switch(struct multicall_entry *mcl, + unsigned long ss, unsigned long esp) +{ + mcl->op = __HYPERVISOR_stack_switch; + mcl->args[0] = ss; + mcl->args[1] = esp; + + trace_xen_mc_entry(mcl, 2); +} +#endif + +static inline int +HYPERVISOR_sched_op(int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op(u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_mca(struct xen_mc *mc_op) +{ + mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; + return _hypercall1(int, mca, mc_op); +} + +static inline int +HYPERVISOR_platform_op(struct xen_platform_op *op) +{ + op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, op); +} + +static inline long +HYPERVISOR_memory_op(unsigned int cmd, void *arg) +{ + return _hypercall2(long, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_event_channel_op(int cmd, void *arg) +{ + return _hypercall2(int, event_channel_op, cmd, arg); +} + +static __always_inline int +HYPERVISOR_xen_version(int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io(int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op(int cmd, void *arg) +{ + return _hypercall2(int, physdev_op, cmd, arg); +} + +static inline int +HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend(unsigned long start_info_mfn) +{ + struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; + + /* + * For a PV guest the tools require that the start_info mfn be + * present in rdx/edx when the hypercall is made. Per the + * hypercall calling convention this is the third hypercall + * argument, which is start_info_mfn here. + */ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn); +} + +static inline unsigned long __must_check +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + +static inline int +HYPERVISOR_xenpmu_op(unsigned int op, void *arg) +{ + return _hypercall2(int, xenpmu_op, op, arg); +} + +static inline int +HYPERVISOR_dm_op( + domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs) +{ + int ret; + __xen_stac(); + ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs); + __xen_clac(); + return ret; +} + +#endif /* _ASM_X86_XEN_HYPERCALL_H */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h new file mode 100644 index 000000000..16f548a66 --- /dev/null +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -0,0 +1,64 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_X86_XEN_HYPERVISOR_H +#define _ASM_X86_XEN_HYPERVISOR_H + +extern struct shared_info *HYPERVISOR_shared_info; +extern struct start_info *xen_start_info; + +#include + +static inline uint32_t xen_cpuid_base(void) +{ + return hypervisor_cpuid_base("XenVMMXenVMM", 2); +} + +struct pci_dev; + +#ifdef CONFIG_XEN_PV_DOM0 +bool xen_initdom_restore_msi(struct pci_dev *dev); +#else +static inline bool xen_initdom_restore_msi(struct pci_dev *dev) { return true; } +#endif + +#ifdef CONFIG_HOTPLUG_CPU +void xen_arch_register_cpu(int num); +void xen_arch_unregister_cpu(int num); +#endif + +#ifdef CONFIG_PVH +void __init xen_pvh_init(struct boot_params *boot_params); +void __init mem_map_via_hcall(struct boot_params *boot_params_p); +#endif + +#endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h new file mode 100644 index 000000000..baca0b00e --- /dev/null +++ b/arch/x86/include/asm/xen/interface.h @@ -0,0 +1,390 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef _ASM_X86_XEN_INTERFACE_H +#define _ASM_X86_XEN_INTERFACE_H + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ +#ifdef __XEN__ +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name + +#ifdef __XEN__ +#if defined(__i386__) +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while (0) +#elif defined(__x86_64__) +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#endif +#else +#if defined(__i386__) +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(uint64_t *)&(hnd) = 0; \ + (hnd) = val; \ + } while (0) +#elif defined(__x86_64__) +#define set_xen_guest_handle(hnd, val) do { (hnd) = val; } while (0) +#endif +#endif + +#ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that on ARM we can have one ABI that works for 32 and 64 + * bit guests. */ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" +typedef long xen_long_t; +#define PRI_xen_long "lx" + +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(void); +DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); +#endif + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: No one may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) + +#ifndef __ASSEMBLY__ +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +DEFINE_GUEST_HANDLE_STRUCT(trap_info); + +struct arch_shared_info { + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + unsigned long max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it + * has been set to invalid e.g. due to the p2m being too large for the + * 3-level p2m tree. In this case the linear mapper p2m list anchored + * at p2m_vaddr is to be used. + */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + /* + * Following three fields are valid if p2m_cr3 contains a value + * different from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register + * state and holds the folded machine frame number (via xen_pfn_to_cr3) + * of a L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All + * entries in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 + * and a value with the least significant bit set indicates that a + * mapping update is in progress. This allows guest external software + * (e.g. in Dom0) to verify that read mappings are consistent and + * whether they have changed since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an + * atomic write only. + */ + unsigned long p2m_cr3; /* cr3 value of the p2m address space */ + unsigned long p2m_vaddr; /* virtual address of the p2m list */ + unsigned long p2m_generation; /* generation count of p2m mapping */ +#ifdef CONFIG_X86_32 + uint32_t wc_sec_hi; +#endif +}; +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_X86_32 +#include +#else +#include +#endif + +#include + +#ifndef __ASSEMBLY__ +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + * + * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise + * for HVM and PVH guests, not all information in this structure is updated: + * + * - For HVM guests, the structures read include: fpu_ctxt (if + * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other fields not used should be set to 0. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; + unsigned long syscall_callback_eip; +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); + +/* AMD PMU registers and structures */ +struct xen_pmu_amd_ctxt { + /* + * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). + * For PV(H) guests these fields are RO. + */ + uint32_t counters; + uint32_t ctrls; + + /* Counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; + +/* Intel PMU registers and structures */ +struct xen_pmu_cntr_pair { + uint64_t counter; + uint64_t control; +}; + +struct xen_pmu_intel_ctxt { + /* + * Offsets to fixed and architectural counter MSRs (relative to + * xen_pmu_arch.c.intel). + * For PV(H) guests these fields are RO. + */ + uint32_t fixed_counters; + uint32_t arch_counters; + + /* PMU registers */ + uint64_t global_ctrl; + uint64_t global_ovf_ctrl; + uint64_t global_status; + uint64_t fixed_ctrl; + uint64_t ds_area; + uint64_t pebs_enable; + uint64_t debugctl; + + /* Fixed and architectural counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; + +/* Sampled domain's registers */ +struct xen_pmu_regs { + uint64_t ip; + uint64_t sp; + uint64_t flags; + uint16_t cs; + uint16_t ss; + uint8_t cpl; + uint8_t pad[3]; +}; + +/* PMU flags */ +#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ + +/* + * Architecture-specific information describing state of the processor at + * the time of PMU interrupt. + * Fields of this structure marked as RW for guest should only be written by + * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the + * hypervisor during PMU interrupt). Hypervisor will read updated data in + * XENPMU_flush hypercall and clear PMU_CACHED bit. + */ +struct xen_pmu_arch { + union { + /* + * Processor's registers at the time of interrupt. + * WO for hypervisor, RO for guests. + */ + struct xen_pmu_regs regs; + /* + * Padding for adding new registers to xen_pmu_regs in + * the future + */ +#define XENPMU_REGS_PAD_SZ 64 + uint8_t pad[XENPMU_REGS_PAD_SZ]; + } r; + + /* WO for hypervisor, RO for guest */ + uint64_t pmu_flags; + + /* + * APIC LVTPC register. + * RW for both hypervisor and guest. + * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware + * during XENPMU_flush or XENPMU_lvtpc_set. + */ + union { + uint32_t lapic_lvtpc; + uint64_t pad; + } l; + + /* + * Vendor-specific PMU registers. + * RW for both hypervisor and guest (see exceptions above). + * Guest's updates to this field are verified and then loaded by the + * hypervisor into hardware during XENPMU_flush + */ + union { + struct xen_pmu_amd_ctxt amd; + struct xen_pmu_intel_ctxt intel; + + /* + * Padding for contexts (fixed parts only, does not include + * MSR banks that are specified by offsets) + */ +#define XENPMU_CTXT_PAD_SZ 128 + uint8_t pad[XENPMU_CTXT_PAD_SZ]; + } c; +}; + +#endif /* !__ASSEMBLY__ */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#include + +#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;) +#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid) + +#endif /* _ASM_X86_XEN_INTERFACE_H */ diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h new file mode 100644 index 000000000..dc40578ab --- /dev/null +++ b/arch/x86/include/asm/xen/interface_32.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef _ASM_X86_XEN_INTERFACE_32_H +#define _ASM_X86_XEN_INTERFACE_32_H + + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +/* And the trap vector is... */ +#define TRAP_INSTR "int $0x82" + +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#define __HYPERVISOR_VIRT_START 0xF5800000 + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ +}; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#define XEN_CALLBACK(__cs, __eip) \ + ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) +#endif /* !__ASSEMBLY__ */ + + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +#endif /* _ASM_X86_XEN_INTERFACE_32_H */ diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h new file mode 100644 index 000000000..c599ec269 --- /dev/null +++ b/arch/x86/include/asm/xen/interface_64.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_XEN_INTERFACE_64_H +#define _ASM_X86_XEN_INTERFACE_64_H + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 +#define __MACH2PHYS_SHIFT 3 + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +typedef unsigned long xen_callback_t; + +#define XEN_CALLBACK(__cs, __rip) \ + ((unsigned long)(__rip)) + +#endif /* !__ASSEMBLY__ */ + + +#endif /* _ASM_X86_XEN_INTERFACE_64_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h new file mode 100644 index 000000000..fa9ec2078 --- /dev/null +++ b/arch/x86/include/asm/xen/page.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_XEN_PAGE_H +#define _ASM_X86_XEN_PAGE_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +/* Xen machine address */ +typedef struct xmaddr { + phys_addr_t maddr; +} xmaddr_t; + +/* Xen pseudo-physical address */ +typedef struct xpaddr { + phys_addr_t paddr; +} xpaddr_t; + +#ifdef CONFIG_X86_64 +#define XEN_PHYSICAL_MASK __sme_clr((1UL << 52) - 1) +#else +#define XEN_PHYSICAL_MASK __PHYSICAL_MASK +#endif + +#define XEN_PTE_MFN_MASK ((pteval_t)(((signed long)PAGE_MASK) & \ + XEN_PHYSICAL_MASK)) + +#define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) +#define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) + +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1)) +#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2)) +#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) +#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) + +#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) + +extern unsigned long *machine_to_phys_mapping; +extern unsigned long machine_to_phys_nr; +extern unsigned long *xen_p2m_addr; +extern unsigned long xen_p2m_size; +extern unsigned long xen_max_p2m_pfn; + +extern int xen_alloc_p2m_entry(unsigned long pfn); + +extern unsigned long get_phys_to_machine(unsigned long pfn); +extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e); + +#ifdef CONFIG_XEN_PV +extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count); +extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, + struct page **pages, unsigned int count); +#else +static inline int +set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + return 0; +} + +static inline int +clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, + struct page **pages, unsigned int count) +{ + return 0; +} +#endif + +/* + * Helper functions to write or read unsigned long values to/from + * memory, when the access may fault. + */ +static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) +{ + int ret = 0; + + asm volatile("1: mov %[val], %[ptr]\n" + "2:\n" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret]) + : [ret] "+r" (ret), [ptr] "=m" (*addr) + : [val] "r" (val)); + + return ret; +} + +static inline int xen_safe_read_ulong(const unsigned long *addr, + unsigned long *val) +{ + unsigned long rval = ~0ul; + int ret = 0; + + asm volatile("1: mov %[ptr], %[rval]\n" + "2:\n" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret]) + : [ret] "+r" (ret), [rval] "+r" (rval) + : [ptr] "m" (*addr)); + *val = rval; + + return ret; +} + +#ifdef CONFIG_XEN_PV +/* + * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): + * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator + * bits (identity or foreign) are set. + * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set + * identity or foreign indicator will be still set. __pfn_to_mfn() is + * encapsulating get_phys_to_machine() which is called in special cases only. + * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special + * cases needing an extended handling. + */ +static inline unsigned long __pfn_to_mfn(unsigned long pfn) +{ + unsigned long mfn; + + if (pfn < xen_p2m_size) + mfn = xen_p2m_addr[pfn]; + else if (unlikely(pfn < xen_max_p2m_pfn)) + return get_phys_to_machine(pfn); + else + return IDENTITY_FRAME(pfn); + + if (unlikely(mfn == INVALID_P2M_ENTRY)) + return get_phys_to_machine(pfn); + + return mfn; +} +#else +static inline unsigned long __pfn_to_mfn(unsigned long pfn) +{ + return pfn; +} +#endif + +static inline unsigned long pfn_to_mfn(unsigned long pfn) +{ + unsigned long mfn; + + /* + * Some x86 code are still using pfn_to_mfn instead of + * pfn_to_mfn. This will have to be removed when we figured + * out which call. + */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return pfn; + + mfn = __pfn_to_mfn(pfn); + + if (mfn != INVALID_P2M_ENTRY) + mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); + + return mfn; +} + +static inline int phys_to_machine_mapping_valid(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 1; + + return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; +} + +static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) +{ + unsigned long pfn; + int ret; + + if (unlikely(mfn >= machine_to_phys_nr)) + return ~0; + + /* + * The array access can fail (e.g., device space beyond end of RAM). + * In such cases it doesn't matter what we return (we return garbage), + * but we must handle the fault without crashing! + */ + ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn); + if (ret < 0) + return ~0; + + return pfn; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + /* + * Some x86 code are still using mfn_to_pfn instead of + * gfn_to_pfn. This will have to be removed when we figure + * out which call. + */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + + pfn = mfn_to_pfn_no_overrides(mfn); + if (__pfn_to_mfn(pfn) != mfn) + pfn = ~0; + + /* + * pfn is ~0 if there are no entries in the m2p for mfn or the + * entry doesn't map back to the mfn. + */ + if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) + pfn = mfn; + + return pfn; +} + +static inline xmaddr_t phys_to_machine(xpaddr_t phys) +{ + unsigned offset = phys.paddr & ~PAGE_MASK; + return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); +} + +static inline xpaddr_t machine_to_phys(xmaddr_t machine) +{ + unsigned offset = machine.maddr & ~PAGE_MASK; + return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); +} + +/* Pseudo-physical <-> Guest conversion */ +static inline unsigned long pfn_to_gfn(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return pfn; + else + return pfn_to_mfn(pfn); +} + +static inline unsigned long gfn_to_pfn(unsigned long gfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return gfn; + else + return mfn_to_pfn(gfn); +} + +/* Pseudo-physical <-> Bus conversion */ +#define pfn_to_bfn(pfn) pfn_to_gfn(pfn) +#define bfn_to_pfn(bfn) gfn_to_pfn(bfn) + +/* + * We detect special mappings in one of two ways: + * 1. If the MFN is an I/O page then Xen will set the m2p entry + * to be outside our maximum possible pseudophys range. + * 2. If the MFN belongs to a different domain then we will certainly + * not have MFN in our p2m table. Conversely, if the page is ours, + * then we'll have p2m(m2p(MFN))==MFN. + * If we detect a special mapping then it doesn't have a 'struct page'. + * We force !pfn_valid() by returning an out-of-range pointer. + * + * NB. These checks require that, for any MFN that is not in our reservation, + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. + * + * NB2. When deliberately mapping foreign pages into the p2m table, you *must* + * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. + */ +static inline unsigned long bfn_to_local_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + + pfn = mfn_to_pfn(mfn); + if (__pfn_to_mfn(pfn) != mfn) + return -1; /* force !pfn_valid() */ + return pfn; +} + +/* VIRT <-> MACHINE conversion */ +#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) +#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) +#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) + +/* VIRT <-> GUEST conversion */ +#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v))) +#define gfn_to_virt(g) (__va(gfn_to_pfn(g) << PAGE_SHIFT)) + +static inline unsigned long pte_mfn(pte_t pte) +{ + return (pte.pte & XEN_PTE_MFN_MASK) >> PAGE_SHIFT; +} + +static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + pte_t pte; + + pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot); + + return pte; +} + +static inline pteval_t pte_val_ma(pte_t pte) +{ + return pte.pte; +} + +static inline pte_t __pte_ma(pteval_t x) +{ + return (pte_t) { .pte = x }; +} + +#define pmd_val_ma(v) ((v).pmd) +#ifdef __PAGETABLE_PUD_FOLDED +#define pud_val_ma(v) ((v).p4d.pgd.pgd) +#else +#define pud_val_ma(v) ((v).pud) +#endif +#define __pmd_ma(x) ((pmd_t) { (x) } ) + +#ifdef __PAGETABLE_P4D_FOLDED +#define p4d_val_ma(x) ((x).pgd.pgd) +#else +#define p4d_val_ma(x) ((x).p4d) +#endif + +xmaddr_t arbitrary_virt_to_machine(void *address); +unsigned long arbitrary_virt_to_mfn(void *vaddr); +void make_lowmem_page_readonly(void *vaddr); +void make_lowmem_page_readwrite(void *vaddr); + +static inline bool xen_arch_need_swiotlb(struct device *dev, + phys_addr_t phys, + dma_addr_t dev_addr) +{ + return false; +} + +#endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h new file mode 100644 index 000000000..9015b888e --- /dev/null +++ b/arch/x86/include/asm/xen/pci.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_XEN_PCI_H +#define _ASM_X86_XEN_PCI_H + +#if defined(CONFIG_PCI_XEN) +extern int __init pci_xen_init(void); +extern int __init pci_xen_hvm_init(void); +#define pci_xen 1 +#else +#define pci_xen 0 +#define pci_xen_init (0) +static inline int pci_xen_hvm_init(void) +{ + return -1; +} +#endif +#ifdef CONFIG_XEN_PV_DOM0 +int __init pci_xen_initial_domain(void); +#else +static inline int __init pci_xen_initial_domain(void) +{ + return -1; +} +#endif + +#if defined(CONFIG_PCI_MSI) +#if defined(CONFIG_PCI_XEN) +/* The drivers/pci/xen-pcifront.c sets this structure to + * its own functions. + */ +struct xen_pci_frontend_ops { + int (*enable_msi)(struct pci_dev *dev, int vectors[]); + void (*disable_msi)(struct pci_dev *dev); + int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec); + void (*disable_msix)(struct pci_dev *dev); +}; + +extern struct xen_pci_frontend_ops *xen_pci_frontend; + +static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, + int vectors[]) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msi) + return xen_pci_frontend->enable_msi(dev, vectors); + return -ENOSYS; +} +static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msi) + xen_pci_frontend->disable_msi(dev); +} +static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, + int vectors[], int nvec) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msix) + return xen_pci_frontend->enable_msix(dev, vectors, nvec); + return -ENOSYS; +} +static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msix) + xen_pci_frontend->disable_msix(dev); +} +#endif /* CONFIG_PCI_XEN */ +#endif /* CONFIG_PCI_MSI */ + +#endif /* _ASM_X86_XEN_PCI_H */ diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h new file mode 100644 index 000000000..77a2d19cc --- /dev/null +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SWIOTLB_XEN_H +#define _ASM_X86_SWIOTLB_XEN_H + +#ifdef CONFIG_SWIOTLB_XEN +extern int pci_xen_swiotlb_init_late(void); +#else +static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } +#endif + +int xen_swiotlb_fixup(void *buf, unsigned long nslabs); +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, + unsigned int address_bits, + dma_addr_t *dma_handle); +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); + +#endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/include/asm/xen/trace_types.h b/arch/x86/include/asm/xen/trace_types.h new file mode 100644 index 000000000..2aad0abd6 --- /dev/null +++ b/arch/x86/include/asm/xen/trace_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_XEN_TRACE_TYPES_H +#define _ASM_XEN_TRACE_TYPES_H + +enum xen_mc_flush_reason { + XEN_MC_FL_NONE, /* explicit flush */ + XEN_MC_FL_BATCH, /* out of hypercall space */ + XEN_MC_FL_ARGS, /* out of argument space */ + XEN_MC_FL_CALLBACK, /* out of callback space */ +}; + +enum xen_mc_extend_args { + XEN_MC_XE_OK, + XEN_MC_XE_BAD_OP, + XEN_MC_XE_NO_SPACE +}; +typedef void (*xen_mc_callback_fn_t)(void *); + +#endif /* _ASM_XEN_TRACE_TYPES_H */ diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h new file mode 100644 index 000000000..7b0307acc --- /dev/null +++ b/arch/x86/include/asm/xor.h @@ -0,0 +1,502 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_XOR_H +#define _ASM_X86_XOR_H + +/* + * Optimized RAID-5 checksumming functions for SSE. + */ + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +/* + * Based on + * High-speed RAID5 checksumming functions utilizing SSE instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +/* + * x86-64 changes / gcc fixes from Andi Kleen. + * Copyright 2002 Andi Kleen, SuSE Labs. + * + * This hasn't been optimized for the hammer yet, but there are likely + * no advantages to be gotten from x86-64 here anyways. + */ + +#include + +#ifdef CONFIG_X86_32 +/* reduce register pressure */ +# define XOR_CONSTANT_CONSTRAINT "i" +#else +# define XOR_CONSTANT_CONSTRAINT "re" +#endif + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" +#define NOP(x) + +#define BLK64(pf, op, i) \ + pf(i) \ + op(i, 0) \ + op(i + 1, 1) \ + op(i + 2, 2) \ + op(i + 3, 3) + +static void +xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + PF1(i) \ + PF1(i + 2) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + PF4(i) \ + PF4(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + XO4(i + 1, 1) \ + XO4(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(PF4, XO4, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static struct xor_block_template xor_block_sse_pf64 = { + .name = "prefetch64-sse", + .do_2 = xor_sse_2_pf64, + .do_3 = xor_sse_3_pf64, + .do_4 = xor_sse_4_pf64, + .do_5 = xor_sse_5_pf64, +}; + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef NOP +#undef BLK64 +#undef BLOCK + +#undef XOR_CONSTANT_CONSTRAINT + +#ifdef CONFIG_X86_32 +# include +#else +# include +#endif + +#define XOR_SELECT_TEMPLATE(FASTEST) \ + AVX_SELECT(FASTEST) + +#endif /* _ASM_X86_XOR_H */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h new file mode 100644 index 000000000..7a6b94745 --- /dev/null +++ b/arch/x86/include/asm/xor_32.h @@ -0,0 +1,573 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_X86_XOR_32_H +#define _ASM_X86_XOR_32_H + +/* + * Optimized RAID-5 checksumming functions for MMX. + */ + +/* + * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +#define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n" +#define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n" +#define XO1(x, y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" +#define XO2(x, y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" +#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" +#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" + +#include + +static void +xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + ST(i, 0) \ + XO1(i+1, 1) \ + ST(i+1, 1) \ + XO1(i + 2, 2) \ + ST(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + ST(i, 0) \ + XO2(i + 1, 1) \ + ST(i + 1, 1) \ + XO2(i + 2, 2) \ + ST(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + ST(i, 0) \ + XO3(i + 1, 1) \ + ST(i + 1, 1) \ + XO3(i + 2, 2) \ + ST(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + kernel_fpu_end(); +} + + +static void +xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + /* Make sure GCC forgets anything it knows about p4 or p5, + such that it won't pass to the asm volatile below a + register that is shared with any other variable. That's + because we modify p4 and p5 there, but we can't mark them + as read/write, otherwise we'd overflow the 10-asm-operands + limit of GCC < 3.1. */ + asm("" : "+r" (p4), "+r" (p5)); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + ST(i, 0) \ + XO4(i + 1, 1) \ + ST(i + 1, 1) \ + XO4(i + 2, 2) \ + ST(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " addl $128, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + /* p4 and p5 were modified, and now the variables are dead. + Clobber them just to be sure nobody does something stupid + like assuming they have some legal value. */ + asm("" : "=r" (p4), "=r" (p5)); + + kernel_fpu_end(); +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +static void +xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + asm volatile( + " .align 32 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq 40(%1), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + asm volatile( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory" ); + + kernel_fpu_end(); +} + +static void +xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + asm volatile( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor (%4), %%mm0 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " pxor 16(%4), %%mm2 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 24(%4), %%mm3 ;\n" + " movq %%mm3, 24(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq 48(%1), %%mm6 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + /* Make sure GCC forgets anything it knows about p4 or p5, + such that it won't pass to the asm volatile below a + register that is shared with any other variable. That's + because we modify p4 and p5 there, but we can't mark them + as read/write, otherwise we'd overflow the 10-asm-operands + limit of GCC < 3.1. */ + asm("" : "+r" (p4), "+r" (p5)); + + asm volatile( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor (%4), %%mm0 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor (%5), %%mm0 ;\n" + " pxor 8(%5), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 16(%4), %%mm2 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%5), %%mm2 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%4), %%mm3 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%5), %%mm3 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 32(%5), %%mm4 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " pxor 40(%5), %%mm5 ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%5), %%mm6 ;\n" + " pxor 56(%5), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " addl $64, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + /* p4 and p5 were modified, and now the variables are dead. + Clobber them just to be sure nobody does something stupid + like assuming they have some legal value. */ + asm("" : "=r" (p4), "=r" (p5)); + + kernel_fpu_end(); +} + +static struct xor_block_template xor_block_pII_mmx = { + .name = "pII_mmx", + .do_2 = xor_pII_mmx_2, + .do_3 = xor_pII_mmx_3, + .do_4 = xor_pII_mmx_4, + .do_5 = xor_pII_mmx_5, +}; + +static struct xor_block_template xor_block_p5_mmx = { + .name = "p5_mmx", + .do_2 = xor_p5_mmx_2, + .do_3 = xor_p5_mmx_3, + .do_4 = xor_p5_mmx_4, + .do_5 = xor_p5_mmx_5, +}; + +static struct xor_block_template xor_block_pIII_sse = { + .name = "pIII_sse", + .do_2 = xor_sse_2, + .do_3 = xor_sse_3, + .do_4 = xor_sse_4, + .do_5 = xor_sse_5, +}; + +/* Also try the AVX routines */ +#include + +/* Also try the generic routines. */ +#include + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + AVX_XOR_SPEED; \ + if (boot_cpu_has(X86_FEATURE_XMM)) { \ + xor_speed(&xor_block_pIII_sse); \ + xor_speed(&xor_block_sse_pf64); \ + } else if (boot_cpu_has(X86_FEATURE_MMX)) { \ + xor_speed(&xor_block_pII_mmx); \ + xor_speed(&xor_block_p5_mmx); \ + } else { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + } \ +} while (0) + +#endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h new file mode 100644 index 000000000..0307e4ec5 --- /dev/null +++ b/arch/x86/include/asm/xor_64.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_XOR_64_H +#define _ASM_X86_XOR_64_H + +static struct xor_block_template xor_block_sse = { + .name = "generic_sse", + .do_2 = xor_sse_2, + .do_3 = xor_sse_3, + .do_4 = xor_sse_4, + .do_5 = xor_sse_5, +}; + + +/* Also try the AVX routines */ +#include + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + AVX_XOR_SPEED; \ + xor_speed(&xor_block_sse_pf64); \ + xor_speed(&xor_block_sse); \ +} while (0) + +#endif /* _ASM_X86_XOR_64_H */ diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h new file mode 100644 index 000000000..7f81dd589 --- /dev/null +++ b/arch/x86/include/asm/xor_avx.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_XOR_AVX_H +#define _ASM_X86_XOR_AVX_H + +/* + * Optimized RAID-5 checksumming functions for AVX + * + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas + * + * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines + */ + +#include +#include + +#define BLOCK4(i) \ + BLOCK(32 * i, 0) \ + BLOCK(32 * (i + 1), 1) \ + BLOCK(32 * (i + 2), 2) \ + BLOCK(32 * (i + 3), 3) + +#define BLOCK16() \ + BLOCK4(0) \ + BLOCK4(4) \ + BLOCK4(8) \ + BLOCK4(12) + +static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1) +{ + unsigned long lines = bytes >> 9; + + kernel_fpu_begin(); + + while (lines--) { +#undef BLOCK +#define BLOCK(i, reg) \ +do { \ + asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p0[i / sizeof(*p0)])); \ + asm volatile("vmovdqa %%ymm" #reg ", %0" : \ + "=m" (p0[i / sizeof(*p0)])); \ +} while (0); + + BLOCK16() + + p0 = (unsigned long *)((uintptr_t)p0 + 512); + p1 = (unsigned long *)((uintptr_t)p1 + 512); + } + + kernel_fpu_end(); +} + +static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 9; + + kernel_fpu_begin(); + + while (lines--) { +#undef BLOCK +#define BLOCK(i, reg) \ +do { \ + asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p1[i / sizeof(*p1)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p0[i / sizeof(*p0)])); \ + asm volatile("vmovdqa %%ymm" #reg ", %0" : \ + "=m" (p0[i / sizeof(*p0)])); \ +} while (0); + + BLOCK16() + + p0 = (unsigned long *)((uintptr_t)p0 + 512); + p1 = (unsigned long *)((uintptr_t)p1 + 512); + p2 = (unsigned long *)((uintptr_t)p2 + 512); + } + + kernel_fpu_end(); +} + +static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 9; + + kernel_fpu_begin(); + + while (lines--) { +#undef BLOCK +#define BLOCK(i, reg) \ +do { \ + asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p2[i / sizeof(*p2)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p1[i / sizeof(*p1)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p0[i / sizeof(*p0)])); \ + asm volatile("vmovdqa %%ymm" #reg ", %0" : \ + "=m" (p0[i / sizeof(*p0)])); \ +} while (0); + + BLOCK16(); + + p0 = (unsigned long *)((uintptr_t)p0 + 512); + p1 = (unsigned long *)((uintptr_t)p1 + 512); + p2 = (unsigned long *)((uintptr_t)p2 + 512); + p3 = (unsigned long *)((uintptr_t)p3 + 512); + } + + kernel_fpu_end(); +} + +static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 9; + + kernel_fpu_begin(); + + while (lines--) { +#undef BLOCK +#define BLOCK(i, reg) \ +do { \ + asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p3[i / sizeof(*p3)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p2[i / sizeof(*p2)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p1[i / sizeof(*p1)])); \ + asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ + "m" (p0[i / sizeof(*p0)])); \ + asm volatile("vmovdqa %%ymm" #reg ", %0" : \ + "=m" (p0[i / sizeof(*p0)])); \ +} while (0); + + BLOCK16() + + p0 = (unsigned long *)((uintptr_t)p0 + 512); + p1 = (unsigned long *)((uintptr_t)p1 + 512); + p2 = (unsigned long *)((uintptr_t)p2 + 512); + p3 = (unsigned long *)((uintptr_t)p3 + 512); + p4 = (unsigned long *)((uintptr_t)p4 + 512); + } + + kernel_fpu_end(); +} + +static struct xor_block_template xor_block_avx = { + .name = "avx", + .do_2 = xor_avx_2, + .do_3 = xor_avx_3, + .do_4 = xor_avx_4, + .do_5 = xor_avx_5, +}; + +#define AVX_XOR_SPEED \ +do { \ + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ + xor_speed(&xor_block_avx); \ +} while (0) + +#define AVX_SELECT(FASTEST) \ + (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) + +#endif diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild new file mode 100644 index 000000000..39606a856 --- /dev/null +++ b/arch/x86/include/uapi/asm/Kbuild @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +generated-y += unistd_32.h +generated-y += unistd_64.h +generated-y += unistd_x32.h diff --git a/arch/x86/include/uapi/asm/a.out.h b/arch/x86/include/uapi/asm/a.out.h new file mode 100644 index 000000000..094c49d8e --- /dev/null +++ b/arch/x86/include/uapi/asm/a.out.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_A_OUT_H +#define _ASM_X86_A_OUT_H + +struct exec +{ + unsigned int a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#endif /* _ASM_X86_A_OUT_H */ diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h new file mode 100644 index 000000000..769b93944 --- /dev/null +++ b/arch/x86/include/uapi/asm/amd_hsmp.h @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_X86_AMD_HSMP_H_ +#define _UAPI_ASM_X86_AMD_HSMP_H_ + +#include + +#pragma pack(4) + +#define HSMP_MAX_MSG_LEN 8 + +/* + * HSMP Messages supported + */ +enum hsmp_message_ids { + HSMP_TEST = 1, /* 01h Increments input value by 1 */ + HSMP_GET_SMU_VER, /* 02h SMU FW version */ + HSMP_GET_PROTO_VER, /* 03h HSMP interface version */ + HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */ + HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */ + HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */ + HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */ + HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */ + HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */ + HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */ + HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */ + HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */ + HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */ + HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */ + HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */ + HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */ + HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */ + HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */ + HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */ + HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */ + HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */ + HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */ + HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */ + HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */ + HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */ + HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */ + HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */ + HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */ + HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */ + HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */ + HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */ + HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */ + HSMP_MSG_ID_MAX, +}; + +struct hsmp_message { + __u32 msg_id; /* Message ID */ + __u16 num_args; /* Number of input argument words in message */ + __u16 response_sz; /* Number of expected output/response words */ + __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */ + __u16 sock_ind; /* socket number */ +}; + +enum hsmp_msg_type { + HSMP_RSVD = -1, + HSMP_SET = 0, + HSMP_GET = 1, +}; + +struct hsmp_msg_desc { + int num_args; + int response_sz; + enum hsmp_msg_type type; +}; + +/* + * User may use these comments as reference, please find the + * supported list of messages and message definition in the + * HSMP chapter of respective family/model PPR. + * + * Not supported messages would return -ENOMSG. + */ +static const struct hsmp_msg_desc hsmp_msg_desc_table[] = { + /* RESERVED */ + {0, 0, HSMP_RSVD}, + + /* + * HSMP_TEST, num_args = 1, response_sz = 1 + * input: args[0] = xx + * output: args[0] = xx + 1 + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1 + * output: args[0] = smu fw ver + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1 + * output: args[0] = proto version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1 + * output: args[0] = socket power in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = power limit value in mWatts + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = socket power limit value in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1 + * output: args[0] = maximuam socket power limit in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0 + * input: args[0] = boost limit value in MHz + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id + * output: args[0] = boost limit value in MHz + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1 + * output: args[0] = proc hot status + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0 + * input: args[0] = df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */ + {0, 0, HSMP_SET}, + + /* + * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2 + * output: args[0] = fclk in MHz, args[1] = mclk in MHz + */ + {0, 2, HSMP_GET}, + + /* + * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = core clock in MHz + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1 + * output: args[0] = average c0 residency + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0 + * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1 + * input: args[0] = nbioid[23:16] + * output: args[0] = max dpm level[15:8] + min dpm level[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1 + * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] + + * bw in percentage[7:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1 + * output: args[0] = temperature in degree celsius. [15:8] integer part + + * [7:5] fractional part + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = refresh rate[3] + temperature range[2:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = temperature in degree celcius[31:21] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = frequency in MHz[31:16] + frequency source[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id [31:0] + * output: args[0] = frequency in MHz[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1 + * output: args[0] = power in mW[31:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1 + * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = io bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = xgmi bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1 + * input: args[0] = link rate control value + * output: args[0] = previous link rate control value + */ + {1, 1, HSMP_SET}, + + /* + * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0 + * input: args[0] = power efficiency mode[2:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0 + * input: args[0] = min df pstate[15:8] + max df pstate[7:0] + */ + {1, 0, HSMP_SET}, +}; + +/* Reset to default packing */ +#pragma pack() + +/* Define unique ioctl command for hsmp msgs using generic _IOWR */ +#define HSMP_BASE_IOCTL_NR 0xF8 +#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message) + +#endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h new file mode 100644 index 000000000..6beb55bbe --- /dev/null +++ b/arch/x86/include/uapi/asm/auxvec.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_AUXVEC_H +#define _ASM_X86_AUXVEC_H +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#ifdef __i386__ +#define AT_SYSINFO 32 +#endif +#define AT_SYSINFO_EHDR 33 + +/* entries in ARCH_DLINFO: */ +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) +# define AT_VECTOR_SIZE_ARCH 3 +#else /* else it's non-compat x86-64 */ +# define AT_VECTOR_SIZE_ARCH 2 +#endif + +#endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/uapi/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000..5d72c8458 --- /dev/null +++ b/arch/x86/include/uapi/asm/bitsperlong.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_X86_BITSPERLONG_H +#define __ASM_X86_BITSPERLONG_H + +#if defined(__x86_64__) && !defined(__ILP32__) +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include + +#endif /* __ASM_X86_BITSPERLONG_H */ + diff --git a/arch/x86/include/uapi/asm/boot.h b/arch/x86/include/uapi/asm/boot.h new file mode 100644 index 000000000..88ffc5aee --- /dev/null +++ b/arch/x86/include/uapi/asm/boot.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_X86_BOOT_H +#define _UAPI_ASM_X86_BOOT_H + +/* Internal svga startup constants */ +#define NORMAL_VGA 0xffff /* 80x25 mode */ +#define EXTENDED_VGA 0xfffe /* 80x50 mode */ +#define ASK_VGA 0xfffd /* ask for it at bootup */ + + +#endif /* _UAPI_ASM_X86_BOOT_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h new file mode 100644 index 000000000..01d19fc22 --- /dev/null +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_BOOTPARAM_H +#define _ASM_X86_BOOTPARAM_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 +#define SETUP_IMA 8 +#define SETUP_RNG_SEED 9 +#define SETUP_ENUM_MAX SETUP_RNG_SEED + +#define SETUP_INDIRECT (1<<31) +#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) + +/* ram_size flags */ +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + +/* loadflags */ +#define LOADED_HIGH (1<<0) +#define KASLR_FLAG (1<<1) +#define QUIET_FLAG (1<<5) +#define KEEP_SEGMENTS (1<<6) +#define CAN_USE_HEAP (1<<7) + +/* xloadflags */ +#define XLF_KERNEL_64 (1<<0) +#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) +#define XLF_EFI_HANDOVER_32 (1<<2) +#define XLF_EFI_HANDOVER_64 (1<<3) +#define XLF_EFI_KEXEC (1<<4) +#define XLF_5LEVEL (1<<5) +#define XLF_5LEVEL_ENABLED (1<<6) + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include +#include