summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/doublefault_32.c
blob: 6eaf9a6bc02fff3a52ca5b2403dc851721cd286c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/init_task.h>
#include <linux/fs.h>

#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/doublefault.h>

#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)

#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)

static void set_df_gdt_entry(unsigned int cpu);

/*
 * Called by double_fault with CR0.TS and EFLAGS.NT cleared.  The CPU thinks
 * we're running the doublefault task.  Cannot return.
 */
asmlinkage noinstr void __noreturn doublefault_shim(void)
{
	unsigned long cr2;
	struct pt_regs regs;

	BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);

	cr2 = native_read_cr2();

	/* Reset back to the normal kernel task. */
	force_reload_TR();
	set_df_gdt_entry(smp_processor_id());

	trace_hardirqs_off();

	/*
	 * Fill in pt_regs.  A downside of doing this in C is that the unwinder
	 * won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
	 * won't successfully unwind to the source of the double fault.
	 * The main dump from exc_double_fault() is fine, though, since it
	 * uses these regs directly.
	 *
	 * If anyone ever cares, this could be moved to asm.
	 */
	regs.ss		= TSS(ss);
	regs.__ssh	= 0;
	regs.sp		= TSS(sp);
	regs.flags	= TSS(flags);
	regs.cs		= TSS(cs);
	/* We won't go through the entry asm, so we can leave __csh as 0. */
	regs.__csh	= 0;
	regs.ip		= TSS(ip);
	regs.orig_ax	= 0;
	regs.gs		= TSS(gs);
	regs.__gsh	= 0;
	regs.fs		= TSS(fs);
	regs.__fsh	= 0;
	regs.es		= TSS(es);
	regs.__esh	= 0;
	regs.ds		= TSS(ds);
	regs.__dsh	= 0;
	regs.ax		= TSS(ax);
	regs.bp		= TSS(bp);
	regs.di		= TSS(di);
	regs.si		= TSS(si);
	regs.dx		= TSS(dx);
	regs.cx		= TSS(cx);
	regs.bx		= TSS(bx);

	exc_double_fault(&regs, 0, cr2);

	/*
	 * x86_32 does not save the original CR3 anywhere on a task switch.
	 * This means that, even if we wanted to return, we would need to find
	 * some way to reconstruct CR3.  We could make a credible guess based
	 * on cpu_tlbstate, but that would be racy and would not account for
	 * PTI.
	 */
	panic("cannot return from double fault\n");
}

DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
	.tss = {
                /*
                 * No sp0 or ss0 -- we never run CPL != 0 with this TSS
                 * active.  sp is filled in later.
                 */
		.ldt		= 0,
	.io_bitmap_base	= IO_BITMAP_OFFSET_INVALID,

		.ip		= (unsigned long) asm_exc_double_fault,
		.flags		= X86_EFLAGS_FIXED,
		.es		= __USER_DS,
		.cs		= __KERNEL_CS,
		.ss		= __KERNEL_DS,
		.ds		= __USER_DS,
		.fs		= __KERNEL_PERCPU,
		.gs		= 0,

		.__cr3		= __pa_nodebug(swapper_pg_dir),
	},
};

static void set_df_gdt_entry(unsigned int cpu)
{
	/* Set up doublefault TSS pointer in the GDT */
	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
		       &get_cpu_entry_area(cpu)->doublefault_stack.tss);

}

void doublefault_init_cpu_tss(void)
{
	unsigned int cpu = smp_processor_id();
	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);

	/*
	 * The linker isn't smart enough to initialize percpu variables that
	 * point to other places in percpu space.
	 */
        this_cpu_write(doublefault_stack.tss.sp,
                       (unsigned long)&cea->doublefault_stack.stack +
                       sizeof(doublefault_stack.stack));

	set_df_gdt_entry(cpu);
}