diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:35:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:31 +0000 |
commit | 85c675d0d09a45a135bddd15d7b385f8758c32fb (patch) | |
tree | 76267dbc9b9a130337be3640948fe397b04ac629 /arch/powerpc/lib | |
parent | Adding upstream version 6.6.15. (diff) | |
download | linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip |
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r-- | arch/powerpc/lib/code-patching.c | 146 | ||||
-rw-r--r-- | arch/powerpc/lib/qspinlock.c | 122 | ||||
-rw-r--r-- | arch/powerpc/lib/sstep.c | 10 |
3 files changed, 201 insertions, 77 deletions
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index b00112d7ad..c6ab46156c 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -38,6 +38,7 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr return 0; failed: + mb(); /* sync */ return -EPERM; } @@ -204,9 +205,6 @@ void __init poking_init(void) { int ret; - if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - return; - if (mm_patch_enabled()) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke_mm:online", @@ -309,10 +307,6 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) err = __patch_instruction(addr, instr, patch_addr); - /* hwsync performed by __patch_instruction (sync) if successful */ - if (err) - mb(); /* sync */ - /* context synchronisation performed by __patch_instruction (isync or exception) */ stop_using_temp_mm(patching_mm, orig_mm); @@ -378,6 +372,144 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) } NOKPROBE_SYMBOL(patch_instruction); +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long start = (unsigned long)patch_addr; + + /* Repeat instruction */ + if (repeat_instr) { + ppc_inst_t instr = ppc_inst_read(code); + + if (ppc_inst_prefixed(instr)) { + u64 val = ppc_inst_as_ulong(instr); + + memset64((u64 *)patch_addr, val, len / 8); + } else { + u32 val = ppc_inst_val(instr); + + memset32(patch_addr, val, len / 4); + } + } else { + memcpy(patch_addr, code, len); + } + + smp_wmb(); /* smp write barrier */ + flush_icache_range(start, start + len); + return 0; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + struct mm_struct *patching_mm, *orig_mm; + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + spinlock_t *ptl; + u32 *patch_addr; + pte_t *pte; + int err; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync" ::: "memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + u32 *patch_addr; + pte_t *pte; + int err; + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync" ::: "memory"); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +/* + * Patch 'addr' with 'len' bytes of instructions from 'code'. + * + * If repeat_instr is true, the same instruction is filled for + * 'len' bytes. + */ +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + while (len > 0) { + unsigned long flags; + size_t plen; + int err; + + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); + else + err = __do_patch_instructions(addr, code, plen, repeat_instr); + local_irq_restore(flags); + if (err) + return err; + + len -= plen; + addr = (u32 *)((unsigned long)addr + plen); + if (!repeat_instr) + code = (u32 *)((unsigned long)code + plen); + } + + return 0; +} +NOKPROBE_SYMBOL(patch_instructions); + int patch_branch(u32 *addr, unsigned long target, int flags) { ppc_inst_t instr; diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 6dd2f46bd3..5de4dd549f 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -16,7 +16,8 @@ struct qnode { struct qnode *next; struct qspinlock *lock; int cpu; - int yield_cpu; + u8 sleepy; /* 1 if the previous vCPU was preempted or + * if the previous node was sleepy */ u8 locked; /* 1 if lock acquired */ }; @@ -43,7 +44,7 @@ static bool pv_sleepy_lock_sticky __read_mostly = false; static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; static int pv_sleepy_lock_factor __read_mostly = 256; static bool pv_yield_prev __read_mostly = true; -static bool pv_yield_propagate_owner __read_mostly = true; +static bool pv_yield_sleepy_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -247,22 +248,18 @@ static __always_inline void seen_sleepy_lock(void) this_cpu_write(sleepy_lock_seen_clock, sched_clock()); } -static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) +static __always_inline void seen_sleepy_node(void) { if (pv_sleepy_lock) { if (pv_sleepy_lock_interval_ns) this_cpu_write(sleepy_lock_seen_clock, sched_clock()); - if (val & _Q_LOCKED_VAL) { - if (!(val & _Q_SLEEPY_VAL)) - try_set_sleepy(lock, val); - } + /* Don't set sleepy because we likely have a stale val */ } } -static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) { - int cpu = decode_tail_cpu(val); - struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); int idx; /* @@ -353,77 +350,66 @@ static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u return __yield_to_locked_owner(lock, val, paravirt, mustq); } -static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) +static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) { struct qnode *next; int owner; if (!paravirt) return; - if (!pv_yield_propagate_owner) - return; - - owner = get_owner_cpu(val); - if (*set_yield_cpu == owner) + if (!pv_yield_sleepy_owner) return; next = READ_ONCE(node->next); if (!next) return; - if (vcpu_is_preempted(owner)) { - next->yield_cpu = owner; - *set_yield_cpu = owner; - } else if (*set_yield_cpu != -1) { - next->yield_cpu = owner; - *set_yield_cpu = owner; - } + if (next->sleepy) + return; + + owner = get_owner_cpu(val); + if (vcpu_is_preempted(owner)) + next->sleepy = 1; } /* Called inside spin_begin() */ -static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) { - int prev_cpu = decode_tail_cpu(val); u32 yield_count; - int yield_cpu; bool preempted = false; if (!paravirt) goto relax; - if (!pv_yield_propagate_owner) - goto yield_prev; - - yield_cpu = READ_ONCE(node->yield_cpu); - if (yield_cpu == -1) { - /* Propagate back the -1 CPU */ - if (node->next && node->next->yield_cpu != -1) - node->next->yield_cpu = yield_cpu; + if (!pv_yield_sleepy_owner) goto yield_prev; - } - - yield_count = yield_count_of(yield_cpu); - if ((yield_count & 1) == 0) - goto yield_prev; /* owner vcpu is running */ - - if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu) - goto yield_prev; /* re-sample lock owner */ - spin_end(); - - preempted = true; - seen_sleepy_node(lock, val); + /* + * If the previous waiter was preempted it might not be able to + * propagate sleepy to us, so check the lock in that case too. + */ + if (node->sleepy || vcpu_is_preempted(prev_cpu)) { + u32 val = READ_ONCE(lock->val); - smp_rmb(); + if (val & _Q_LOCKED_VAL) { + if (node->next && !node->next->sleepy) { + /* + * Propagate sleepy to next waiter. Only if + * owner is preempted, which allows the queue + * to become "non-sleepy" if vCPU preemption + * ceases to occur, even if the lock remains + * highly contended. + */ + if (vcpu_is_preempted(get_owner_cpu(val))) + node->next->sleepy = 1; + } - if (yield_cpu == node->yield_cpu) { - if (node->next && node->next->yield_cpu != yield_cpu) - node->next->yield_cpu = yield_cpu; - yield_to_preempted(yield_cpu, yield_count); - spin_begin(); - return preempted; + preempted = yield_to_locked_owner(lock, val, paravirt); + if (preempted) + return preempted; + } + node->sleepy = false; } - spin_begin(); yield_prev: if (!pv_yield_prev) @@ -436,7 +422,7 @@ yield_prev: spin_end(); preempted = true; - seen_sleepy_node(lock, val); + seen_sleepy_node(); smp_rmb(); /* See __yield_to_locked_owner comment */ @@ -546,7 +532,6 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b bool sleepy = false; bool mustq = false; int idx; - int set_yield_cpu = -1; int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -570,7 +555,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node->next = NULL; node->lock = lock; node->cpu = smp_processor_id(); - node->yield_cpu = -1; + node->sleepy = 0; node->locked = 0; tail = encode_tail_cpu(node->cpu); @@ -587,7 +572,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b * head of the waitqueue. */ if (old & _Q_TAIL_CPU_MASK) { - struct qnode *prev = get_tail_qnode(lock, old); + int prev_cpu = decode_tail_cpu(old); + struct qnode *prev = get_tail_qnode(lock, prev_cpu); /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); @@ -597,16 +583,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b while (!READ_ONCE(node->locked)) { spec_barrier(); - if (yield_to_prev(lock, node, old, paravirt)) + if (yield_to_prev(lock, node, prev_cpu, paravirt)) seen_preempted = true; } spec_barrier(); spin_end(); - /* Clear out stale propagated yield_cpu */ - if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) - node->yield_cpu = -1; - smp_rmb(); /* acquire barrier for the mcs lock */ /* @@ -648,7 +630,7 @@ again: } } - propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); + propagate_sleepy(node, val, paravirt); preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; @@ -952,21 +934,21 @@ static int pv_yield_prev_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); -static int pv_yield_propagate_owner_set(void *data, u64 val) +static int pv_yield_sleepy_owner_set(void *data, u64 val) { - pv_yield_propagate_owner = !!val; + pv_yield_sleepy_owner = !!val; return 0; } -static int pv_yield_propagate_owner_get(void *data, u64 *val) +static int pv_yield_sleepy_owner_get(void *data, u64 *val) { - *val = pv_yield_propagate_owner; + *val = pv_yield_sleepy_owner; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); static int pv_prod_head_set(void *data, u64 val) { @@ -998,7 +980,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); - debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); + debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); } diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a4ab862506..6af97dc0f6 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ |