// SPDX-License-Identifier: GPL-2.0 /* * AMD Encrypted Register State Support * * Author: Joerg Roedel * * This file is not compiled stand-alone. It contains code shared * between the pre-decompression boot code and the running Linux kernel * and is included directly into both code-bases. */ #ifndef __BOOT_COMPRESSED #define error(v) pr_err(v) #define has_cpuflag(f) boot_cpu_has(f) #endif static bool __init sev_es_check_cpu_features(void) { if (!has_cpuflag(X86_FEATURE_RDRAND)) { error("RDRAND instruction not supported - no trusted source of randomness available\n"); return false; } return true; } static void sev_es_terminate(unsigned int reason) { u64 val = GHCB_SEV_TERMINATE; /* * Tell the hypervisor what went wrong - only reason-set 0 is * currently supported. */ val |= GHCB_SEV_TERMINATE_REASON(0, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); VMGEXIT(); while (true) asm volatile("hlt\n" : : : "memory"); } static bool sev_es_negotiate_protocol(void) { u64 val; /* Do the GHCB protocol version negotiation */ sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); VMGEXIT(); val = sev_es_rd_ghcb_msr(); if (GHCB_INFO(val) != GHCB_SEV_INFO) return false; if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) return false; return true; } static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) { ghcb->save.sw_exit_code = 0; memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } static bool vc_decoding_needed(unsigned long exit_code) { /* Exceptions don't require to decode the instruction */ return !(exit_code >= SVM_EXIT_EXCP_BASE && exit_code <= SVM_EXIT_LAST_EXCP); } static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, struct pt_regs *regs, unsigned long exit_code) { enum es_result ret = ES_OK; memset(ctxt, 0, sizeof(*ctxt)); ctxt->regs = regs; if (vc_decoding_needed(exit_code)) ret = vc_decode_insn(ctxt); return ret; } static void vc_finish_insn(struct es_em_ctxt *ctxt) { ctxt->regs->ip += ctxt->insn.length; } static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, struct es_em_ctxt *ctxt, u64 exit_code, u64 exit_info_1, u64 exit_info_2) { enum es_result ret; /* Fill in protocol and format specifiers */ ghcb->protocol_version = GHCB_PROTOCOL_MAX; ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; ghcb_set_sw_exit_code(ghcb, exit_code); ghcb_set_sw_exit_info_1(ghcb, exit_info_1); ghcb_set_sw_exit_info_2(ghcb, exit_info_2); sev_es_wr_ghcb_msr(__pa(ghcb)); VMGEXIT(); if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) { u64 info = ghcb->save.sw_exit_info_2; unsigned long v; info = ghcb->save.sw_exit_info_2; v = info & SVM_EVTINJ_VEC_MASK; /* Check if exception information from hypervisor is sane. */ if ((info & SVM_EVTINJ_VALID) && ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { ctxt->fi.vector = v; if (info & SVM_EVTINJ_VALID_ERR) ctxt->fi.error_code = info >> 32; ret = ES_EXCEPTION; } else { ret = ES_VMM_ERROR; } } else if (ghcb->save.sw_exit_info_1 & 0xffffffff) { ret = ES_VMM_ERROR; } else { ret = ES_OK; } return ret; } /* * Boot VC Handler - This is the first VC handler during boot, there is no GHCB * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int fn = lower_bits(regs->ax, 32); unsigned long val; /* Only CPUID is supported via MSR protocol */ if (exit_code != SVM_EXIT_CPUID) goto fail; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) goto fail; regs->ax = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) goto fail; regs->bx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) goto fail; regs->cx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) goto fail; regs->dx = val >> 32; /* * This is a VC handler and the #VC is only raised when SEV-ES is * active, which means SEV must be active too. Do sanity checks on the * CPUID results to make sure the hypervisor does not trick the kernel * into the no-sev path. This could map sensitive data unencrypted and * make it accessible to the hypervisor. * * In particular, check for: * - Availability of CPUID leaf 0x8000001f * - SEV CPUID bit. * * The hypervisor might still report the wrong C-bit position, but this * can't be checked here. */ if (fn == 0x80000000 && (regs->ax < 0x8000001f)) /* SEV leaf check */ goto fail; else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) /* SEV bit */ goto fail; /* Skip over the CPUID two-byte opcode */ regs->ip += 2; return; fail: sev_es_wr_ghcb_msr(GHCB_SEV_TERMINATE); VMGEXIT(); /* Shouldn't get here - if we do halt the machine */ while (true) asm volatile("hlt\n"); } static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, unsigned long address, bool write) { if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_USER; ctxt->fi.cr2 = address; if (write) ctxt->fi.error_code |= X86_PF_WRITE; return ES_EXCEPTION; } return ES_OK; } static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, void *src, char *buf, unsigned int data_size, unsigned int count, bool backwards) { int i, b = backwards ? -1 : 1; unsigned long address = (unsigned long)src; enum es_result ret; ret = vc_insn_string_check(ctxt, address, false); if (ret != ES_OK) return ret; for (i = 0; i < count; i++) { void *s = src + (i * data_size * b); char *d = buf + (i * data_size); ret = vc_read_mem(ctxt, s, d, data_size); if (ret != ES_OK) break; } return ret; } static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, void *dst, char *buf, unsigned int data_size, unsigned int count, bool backwards) { int i, s = backwards ? -1 : 1; unsigned long address = (unsigned long)dst; enum es_result ret; ret = vc_insn_string_check(ctxt, address, true); if (ret != ES_OK) return ret; for (i = 0; i < count; i++) { void *d = dst + (i * data_size * s); char *b = buf + (i * data_size); ret = vc_write_mem(ctxt, d, b, data_size); if (ret != ES_OK) break; } return ret; } #define IOIO_TYPE_STR BIT(2) #define IOIO_TYPE_IN 1 #define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) #define IOIO_TYPE_OUT 0 #define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) #define IOIO_REP BIT(3) #define IOIO_ADDR_64 BIT(9) #define IOIO_ADDR_32 BIT(8) #define IOIO_ADDR_16 BIT(7) #define IOIO_DATA_32 BIT(6) #define IOIO_DATA_16 BIT(5) #define IOIO_DATA_8 BIT(4) #define IOIO_SEG_ES (0 << 10) #define IOIO_SEG_DS (3 << 10) static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; size_t size; u64 port; *exitinfo = 0; switch (insn->opcode.bytes[0]) { /* INS opcodes */ case 0x6c: case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ case 0x6e: case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } *exitinfo |= port << 16; switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: case 0xe4: case 0xe6: case 0xec: case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; size = (insn->opnd_bytes == 2) ? 2 : 4; } switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; break; case 4: *exitinfo |= IOIO_ADDR_32; break; case 8: *exitinfo |= IOIO_ADDR_64; break; } if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct pt_regs *regs = ctxt->regs; u64 exit_info_1, exit_info_2; enum es_result ret; ret = vc_ioio_exitinfo(ctxt, &exit_info_1); if (ret != ES_OK) return ret; if (exit_info_1 & IOIO_TYPE_STR) { /* (REP) INS/OUTS */ bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); unsigned int io_bytes, exit_bytes; unsigned int ghcb_count, op_count; unsigned long es_base; u64 sw_scratch; /* * For the string variants with rep prefix the amount of in/out * operations per #VC exception is limited so that the kernel * has a chance to take interrupts and re-schedule while the * instruction is emulated. */ io_bytes = (exit_info_1 >> 4) & 0x7; ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; exit_info_2 = min(op_count, ghcb_count); exit_bytes = exit_info_2 * io_bytes; es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); /* Read bytes of OUTS into the shared buffer */ if (!(exit_info_1 & IOIO_TYPE_IN)) { ret = vc_insn_string_read(ctxt, (void *)(es_base + regs->si), ghcb->shared_buffer, io_bytes, exit_info_2, df); if (ret) return ret; } /* * Issue an VMGEXIT to the HV to consume the bytes from the * shared buffer or to have it write them into the shared buffer * depending on the instruction: OUTS or INS. */ sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); ghcb_set_sw_scratch(ghcb, sw_scratch); ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, exit_info_2); if (ret != ES_OK) return ret; /* Read bytes from shared buffer into the guest's destination. */ if (exit_info_1 & IOIO_TYPE_IN) { ret = vc_insn_string_write(ctxt, (void *)(es_base + regs->di), ghcb->shared_buffer, io_bytes, exit_info_2, df); if (ret) return ret; if (df) regs->di -= exit_bytes; else regs->di += exit_bytes; } else { if (df) regs->si -= exit_bytes; else regs->si += exit_bytes; } if (exit_info_1 & IOIO_REP) regs->cx -= exit_info_2; ret = regs->cx ? ES_RETRY : ES_OK; } else { /* IN/OUT into/from rAX */ int bits = (exit_info_1 & 0x70) >> 1; u64 rax = 0; if (!(exit_info_1 & IOIO_TYPE_IN)) rax = lower_bits(regs->ax, bits); ghcb_set_rax(ghcb, rax); ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0); if (ret != ES_OK) return ret; if (exit_info_1 & IOIO_TYPE_IN) { if (!ghcb_rax_is_valid(ghcb)) return ES_VMM_ERROR; regs->ax = lower_bits(ghcb->save.rax, bits); } } return ret; } static enum es_result vc_handle_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct pt_regs *regs = ctxt->regs; u32 cr4 = native_read_cr4(); enum es_result ret; ghcb_set_rax(ghcb, regs->ax); ghcb_set_rcx(ghcb, regs->cx); if (cr4 & X86_CR4_OSXSAVE) /* Safe to read xcr0 */ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); else /* xgetbv will cause #GP - use reset value for xcr0 */ ghcb_set_xcr0(ghcb, 1); ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); if (ret != ES_OK) return ret; if (!(ghcb_rax_is_valid(ghcb) && ghcb_rbx_is_valid(ghcb) && ghcb_rcx_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) return ES_VMM_ERROR; regs->ax = ghcb->save.rax; regs->bx = ghcb->save.rbx; regs->cx = ghcb->save.rcx; regs->dx = ghcb->save.rdx; return ES_OK; } static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, struct es_em_ctxt *ctxt, unsigned long exit_code) { bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); enum es_result ret; ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); if (ret != ES_OK) return ret; if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && (!rdtscp || ghcb_rcx_is_valid(ghcb)))) return ES_VMM_ERROR; ctxt->regs->ax = ghcb->save.rax; ctxt->regs->dx = ghcb->save.rdx; if (rdtscp) ctxt->regs->cx = ghcb->save.rcx; return ES_OK; }