summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/sev-es-shared.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/x86/kernel/sev-es-shared.c567
1 files changed, 567 insertions, 0 deletions
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
new file mode 100644
index 000000000..e9f8a2bc5
--- /dev/null
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -0,0 +1,567 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ *
+ * This file is not compiled stand-alone. It contains code shared
+ * between the pre-decompression boot code and the running Linux kernel
+ * and is included directly into both code-bases.
+ */
+
+#ifndef __BOOT_COMPRESSED
+#define error(v) pr_err(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#endif
+
+static bool __init sev_es_check_cpu_features(void)
+{
+ if (!has_cpuflag(X86_FEATURE_RDRAND)) {
+ error("RDRAND instruction not supported - no trusted source of randomness available\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void sev_es_terminate(unsigned int reason)
+{
+ u64 val = GHCB_SEV_TERMINATE;
+
+ /*
+ * Tell the hypervisor what went wrong - only reason-set 0 is
+ * currently supported.
+ */
+ val |= GHCB_SEV_TERMINATE_REASON(0, reason);
+
+ /* Request Guest Termination from Hypvervisor */
+ sev_es_wr_ghcb_msr(val);
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
+static bool sev_es_negotiate_protocol(void)
+{
+ u64 val;
+
+ /* Do the GHCB protocol version negotiation */
+ sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+
+ if (GHCB_INFO(val) != GHCB_SEV_INFO)
+ return false;
+
+ if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+ GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
+ return false;
+
+ return true;
+}
+
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+ ghcb->save.sw_exit_code = 0;
+ memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static bool vc_decoding_needed(unsigned long exit_code)
+{
+ /* Exceptions don't require to decode the instruction */
+ return !(exit_code >= SVM_EXIT_EXCP_BASE &&
+ exit_code <= SVM_EXIT_LAST_EXCP);
+}
+
+static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
+ struct pt_regs *regs,
+ unsigned long exit_code)
+{
+ enum es_result ret = ES_OK;
+
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->regs = regs;
+
+ if (vc_decoding_needed(exit_code))
+ ret = vc_decode_insn(ctxt);
+
+ return ret;
+}
+
+static void vc_finish_insn(struct es_em_ctxt *ctxt)
+{
+ ctxt->regs->ip += ctxt->insn.length;
+}
+
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
+{
+ enum es_result ret;
+
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
+ u64 info = ghcb->save.sw_exit_info_2;
+ unsigned long v;
+
+ info = ghcb->save.sw_exit_info_2;
+ v = info & SVM_EVTINJ_VEC_MASK;
+
+ /* Check if exception information from hypervisor is sane. */
+ if ((info & SVM_EVTINJ_VALID) &&
+ ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
+ ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
+ ctxt->fi.vector = v;
+ if (info & SVM_EVTINJ_VALID_ERR)
+ ctxt->fi.error_code = info >> 32;
+ ret = ES_EXCEPTION;
+ } else {
+ ret = ES_VMM_ERROR;
+ }
+ } else if (ghcb->save.sw_exit_info_1 & 0xffffffff) {
+ ret = ES_VMM_ERROR;
+ } else {
+ ret = ES_OK;
+ }
+
+ return ret;
+}
+
+/*
+ * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
+ * page yet, so it only supports the MSR based communication with the
+ * hypervisor and only the CPUID exit-code.
+ */
+void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+{
+ unsigned int fn = lower_bits(regs->ax, 32);
+ unsigned long val;
+
+ /* Only CPUID is supported via MSR protocol */
+ if (exit_code != SVM_EXIT_CPUID)
+ goto fail;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->ax = val >> 32;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->bx = val >> 32;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->cx = val >> 32;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->dx = val >> 32;
+
+ /*
+ * This is a VC handler and the #VC is only raised when SEV-ES is
+ * active, which means SEV must be active too. Do sanity checks on the
+ * CPUID results to make sure the hypervisor does not trick the kernel
+ * into the no-sev path. This could map sensitive data unencrypted and
+ * make it accessible to the hypervisor.
+ *
+ * In particular, check for:
+ * - Availability of CPUID leaf 0x8000001f
+ * - SEV CPUID bit.
+ *
+ * The hypervisor might still report the wrong C-bit position, but this
+ * can't be checked here.
+ */
+
+ if (fn == 0x80000000 && (regs->ax < 0x8000001f))
+ /* SEV leaf check */
+ goto fail;
+ else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
+ /* SEV bit */
+ goto fail;
+
+ /* Skip over the CPUID two-byte opcode */
+ regs->ip += 2;
+
+ return;
+
+fail:
+ sev_es_wr_ghcb_msr(GHCB_SEV_TERMINATE);
+ VMGEXIT();
+
+ /* Shouldn't get here - if we do halt the machine */
+ while (true)
+ asm volatile("hlt\n");
+}
+
+static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
+ unsigned long address,
+ bool write)
+{
+ if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_USER;
+ ctxt->fi.cr2 = address;
+ if (write)
+ ctxt->fi.error_code |= X86_PF_WRITE;
+
+ return ES_EXCEPTION;
+ }
+
+ return ES_OK;
+}
+
+static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
+ void *src, char *buf,
+ unsigned int data_size,
+ unsigned int count,
+ bool backwards)
+{
+ int i, b = backwards ? -1 : 1;
+ unsigned long address = (unsigned long)src;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, false);
+ if (ret != ES_OK)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ void *s = src + (i * data_size * b);
+ char *d = buf + (i * data_size);
+
+ ret = vc_read_mem(ctxt, s, d, data_size);
+ if (ret != ES_OK)
+ break;
+ }
+
+ return ret;
+}
+
+static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
+ void *dst, char *buf,
+ unsigned int data_size,
+ unsigned int count,
+ bool backwards)
+{
+ int i, s = backwards ? -1 : 1;
+ unsigned long address = (unsigned long)dst;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, true);
+ if (ret != ES_OK)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ void *d = dst + (i * data_size * s);
+ char *b = buf + (i * data_size);
+
+ ret = vc_write_mem(ctxt, d, b, data_size);
+ if (ret != ES_OK)
+ break;
+ }
+
+ return ret;
+}
+
+#define IOIO_TYPE_STR BIT(2)
+#define IOIO_TYPE_IN 1
+#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR)
+#define IOIO_TYPE_OUT 0
+#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
+
+#define IOIO_REP BIT(3)
+
+#define IOIO_ADDR_64 BIT(9)
+#define IOIO_ADDR_32 BIT(8)
+#define IOIO_ADDR_16 BIT(7)
+
+#define IOIO_DATA_32 BIT(6)
+#define IOIO_DATA_16 BIT(5)
+#define IOIO_DATA_8 BIT(4)
+
+#define IOIO_SEG_ES (0 << 10)
+#define IOIO_SEG_DS (3 << 10)
+
+static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+{
+ struct insn *insn = &ctxt->insn;
+ size_t size;
+ u64 port;
+
+ *exitinfo = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ /* INS opcodes */
+ case 0x6c:
+ case 0x6d:
+ *exitinfo |= IOIO_TYPE_INS;
+ *exitinfo |= IOIO_SEG_ES;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ /* OUTS opcodes */
+ case 0x6e:
+ case 0x6f:
+ *exitinfo |= IOIO_TYPE_OUTS;
+ *exitinfo |= IOIO_SEG_DS;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ /* IN immediate opcodes */
+ case 0xe4:
+ case 0xe5:
+ *exitinfo |= IOIO_TYPE_IN;
+ port = (u8)insn->immediate.value & 0xffff;
+ break;
+
+ /* OUT immediate opcodes */
+ case 0xe6:
+ case 0xe7:
+ *exitinfo |= IOIO_TYPE_OUT;
+ port = (u8)insn->immediate.value & 0xffff;
+ break;
+
+ /* IN register opcodes */
+ case 0xec:
+ case 0xed:
+ *exitinfo |= IOIO_TYPE_IN;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ /* OUT register opcodes */
+ case 0xee:
+ case 0xef:
+ *exitinfo |= IOIO_TYPE_OUT;
+ port = ctxt->regs->dx & 0xffff;
+ break;
+
+ default:
+ return ES_DECODE_FAILED;
+ }
+
+ *exitinfo |= port << 16;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0x6c:
+ case 0x6e:
+ case 0xe4:
+ case 0xe6:
+ case 0xec:
+ case 0xee:
+ /* Single byte opcodes */
+ *exitinfo |= IOIO_DATA_8;
+ size = 1;
+ break;
+ default:
+ /* Length determined by instruction parsing */
+ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
+ : IOIO_DATA_32;
+ size = (insn->opnd_bytes == 2) ? 2 : 4;
+ }
+
+ switch (insn->addr_bytes) {
+ case 2:
+ *exitinfo |= IOIO_ADDR_16;
+ break;
+ case 4:
+ *exitinfo |= IOIO_ADDR_32;
+ break;
+ case 8:
+ *exitinfo |= IOIO_ADDR_64;
+ break;
+ }
+
+ if (insn_has_rep_prefix(insn))
+ *exitinfo |= IOIO_REP;
+
+ return vc_ioio_check(ctxt, (u16)port, size);
+}
+
+static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ u64 exit_info_1, exit_info_2;
+ enum es_result ret;
+
+ ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
+ if (ret != ES_OK)
+ return ret;
+
+ if (exit_info_1 & IOIO_TYPE_STR) {
+
+ /* (REP) INS/OUTS */
+
+ bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
+ unsigned int io_bytes, exit_bytes;
+ unsigned int ghcb_count, op_count;
+ unsigned long es_base;
+ u64 sw_scratch;
+
+ /*
+ * For the string variants with rep prefix the amount of in/out
+ * operations per #VC exception is limited so that the kernel
+ * has a chance to take interrupts and re-schedule while the
+ * instruction is emulated.
+ */
+ io_bytes = (exit_info_1 >> 4) & 0x7;
+ ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
+
+ op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
+ exit_info_2 = min(op_count, ghcb_count);
+ exit_bytes = exit_info_2 * io_bytes;
+
+ es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+ /* Read bytes of OUTS into the shared buffer */
+ if (!(exit_info_1 & IOIO_TYPE_IN)) {
+ ret = vc_insn_string_read(ctxt,
+ (void *)(es_base + regs->si),
+ ghcb->shared_buffer, io_bytes,
+ exit_info_2, df);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Issue an VMGEXIT to the HV to consume the bytes from the
+ * shared buffer or to have it write them into the shared buffer
+ * depending on the instruction: OUTS or INS.
+ */
+ sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
+ ghcb_set_sw_scratch(ghcb, sw_scratch);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
+ exit_info_1, exit_info_2);
+ if (ret != ES_OK)
+ return ret;
+
+ /* Read bytes from shared buffer into the guest's destination. */
+ if (exit_info_1 & IOIO_TYPE_IN) {
+ ret = vc_insn_string_write(ctxt,
+ (void *)(es_base + regs->di),
+ ghcb->shared_buffer, io_bytes,
+ exit_info_2, df);
+ if (ret)
+ return ret;
+
+ if (df)
+ regs->di -= exit_bytes;
+ else
+ regs->di += exit_bytes;
+ } else {
+ if (df)
+ regs->si -= exit_bytes;
+ else
+ regs->si += exit_bytes;
+ }
+
+ if (exit_info_1 & IOIO_REP)
+ regs->cx -= exit_info_2;
+
+ ret = regs->cx ? ES_RETRY : ES_OK;
+
+ } else {
+
+ /* IN/OUT into/from rAX */
+
+ int bits = (exit_info_1 & 0x70) >> 1;
+ u64 rax = 0;
+
+ if (!(exit_info_1 & IOIO_TYPE_IN))
+ rax = lower_bits(regs->ax, bits);
+
+ ghcb_set_rax(ghcb, rax);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (exit_info_1 & IOIO_TYPE_IN) {
+ if (!ghcb_rax_is_valid(ghcb))
+ return ES_VMM_ERROR;
+ regs->ax = lower_bits(ghcb->save.rax, bits);
+ }
+ }
+
+ return ret;
+}
+
+static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ u32 cr4 = native_read_cr4();
+ enum es_result ret;
+
+ ghcb_set_rax(ghcb, regs->ax);
+ ghcb_set_rcx(ghcb, regs->cx);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #GP - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ regs->ax = ghcb->save.rax;
+ regs->bx = ghcb->save.rbx;
+ regs->cx = ghcb->save.rcx;
+ regs->dx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ unsigned long exit_code)
+{
+ bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
+ enum es_result ret;
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
+ (!rdtscp || ghcb_rcx_is_valid(ghcb))))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+ ctxt->regs->dx = ghcb->save.rdx;
+ if (rdtscp)
+ ctxt->regs->cx = ghcb->save.rcx;
+
+ return ES_OK;
+}