diff options
Diffstat (limited to '')
18 files changed, 8 insertions, 1815 deletions
diff --git a/debian/patches-rt/0001-printk-nbcon-Relocate-32bit-seq-macros.patch b/debian/patches-rt/0001-printk-nbcon-Relocate-32bit-seq-macros.patch deleted file mode 100644 index 5de6d46854..0000000000 --- a/debian/patches-rt/0001-printk-nbcon-Relocate-32bit-seq-macros.patch +++ /dev/null @@ -1,141 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 6 Dec 2023 12:01:56 +0000 -Subject: [PATCH 01/50] printk: nbcon: Relocate 32bit seq macros -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -The macros __seq_to_nbcon_seq() and __nbcon_seq_to_seq() are -used to provide support for atomic handling of sequence numbers -on 32bit systems. Until now this was only used by nbcon.c, -which is why they were located in nbcon.c and include nbcon in -the name. - -In a follow-up commit this functionality is also needed by -printk_ringbuffer. Rather than duplicating the functionality, -relocate the macros to printk_ringbuffer.h. - -Also, since the macros will be no longer nbcon-specific, rename -them to __u64seq_to_ulseq() and __ulseq_to_u64seq(). - -This does not result in any functional change. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/nbcon.c | 41 +++----------------------------------- - kernel/printk/printk_ringbuffer.h | 33 ++++++++++++++++++++++++++++++ - 2 files changed, 37 insertions(+), 37 deletions(-) - ---- a/kernel/printk/nbcon.c -+++ b/kernel/printk/nbcon.c -@@ -140,39 +140,6 @@ static inline bool nbcon_state_try_cmpxc - return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); - } - --#ifdef CONFIG_64BIT -- --#define __seq_to_nbcon_seq(seq) (seq) --#define __nbcon_seq_to_seq(seq) (seq) -- --#else /* CONFIG_64BIT */ -- --#define __seq_to_nbcon_seq(seq) ((u32)seq) -- --static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) --{ -- u64 seq; -- u64 rb_next_seq; -- -- /* -- * The provided sequence is only the lower 32 bits of the ringbuffer -- * sequence. It needs to be expanded to 64bit. Get the next sequence -- * number from the ringbuffer and fold it. -- * -- * Having a 32bit representation in the console is sufficient. -- * If a console ever gets more than 2^31 records behind -- * the ringbuffer then this is the least of the problems. -- * -- * Also the access to the ring buffer is always safe. -- */ -- rb_next_seq = prb_next_seq(prb); -- seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); -- -- return seq; --} -- --#endif /* CONFIG_64BIT */ -- - /** - * nbcon_seq_read - Read the current console sequence - * @con: Console to read the sequence of -@@ -183,7 +150,7 @@ u64 nbcon_seq_read(struct console *con) - { - unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); - -- return __nbcon_seq_to_seq(nbcon_seq); -+ return __ulseq_to_u64seq(prb, nbcon_seq); - } - - /** -@@ -204,7 +171,7 @@ void nbcon_seq_force(struct console *con - */ - u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); - -- atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); -+ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); - - /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ - con->seq = 0; -@@ -223,11 +190,11 @@ void nbcon_seq_force(struct console *con - */ - static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) - { -- unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); -+ unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); - struct console *con = ctxt->console; - - if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, -- __seq_to_nbcon_seq(new_seq))) { -+ __u64seq_to_ulseq(new_seq))) { - ctxt->seq = new_seq; - } else { - ctxt->seq = nbcon_seq_read(con); ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -381,4 +381,37 @@ bool prb_read_valid_info(struct printk_r - u64 prb_first_valid_seq(struct printk_ringbuffer *rb); - u64 prb_next_seq(struct printk_ringbuffer *rb); - -+#ifdef CONFIG_64BIT -+ -+#define __u64seq_to_ulseq(u64seq) (u64seq) -+#define __ulseq_to_u64seq(rb, ulseq) (ulseq) -+ -+#else /* CONFIG_64BIT */ -+ -+#define __u64seq_to_ulseq(u64seq) ((u32)u64seq) -+ -+static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) -+{ -+ u64 seq; -+ u64 rb_next_seq; -+ -+ /* -+ * The provided sequence is only the lower 32 bits of the ringbuffer -+ * sequence. It needs to be expanded to 64bit. Get the next sequence -+ * number from the ringbuffer and fold it. -+ * -+ * Having a 32bit representation in the console is sufficient. -+ * If a console ever gets more than 2^31 records behind -+ * the ringbuffer then this is the least of the problems. -+ * -+ * Also the access to the ring buffer is always safe. -+ */ -+ rb_next_seq = prb_next_seq(rb); -+ seq = rb_next_seq - ((u32)rb_next_seq - ulseq); -+ -+ return seq; -+} -+ -+#endif /* CONFIG_64BIT */ -+ - #endif /* _KERNEL_PRINTK_RINGBUFFER_H */ diff --git a/debian/patches-rt/0002-printk-Adjust-mapping-for-32bit-seq-macros.patch b/debian/patches-rt/0002-printk-Adjust-mapping-for-32bit-seq-macros.patch deleted file mode 100644 index 610a2d3151..0000000000 --- a/debian/patches-rt/0002-printk-Adjust-mapping-for-32bit-seq-macros.patch +++ /dev/null @@ -1,71 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 7 Dec 2023 14:15:15 +0000 -Subject: [PATCH 02/50] printk: Adjust mapping for 32bit seq macros -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -Note: This change only applies to 32bit architectures. On 64bit - architectures the macros are NOPs. - -__ulseq_to_u64seq() computes the upper 32 bits of the passed -argument value (@ulseq). The upper bits are derived from a base -value (@rb_next_seq) in a way that assumes @ulseq represents a -64bit number that is less than or equal to @rb_next_seq. - -Until now this mapping has been correct for all call sites. However, -in a follow-up commit, values of @ulseq will be passed in that are -higher than the base value. This requires a change to how the 32bit -value is mapped to a 64bit sequence number. - -Rather than mapping @ulseq such that the base value is the end of a -32bit block, map @ulseq such that the base value is in the middle of -a 32bit block. This allows supporting 31 bits before and after the -base value, which is deemed acceptable for the console sequence -number during runtime. - -Here is an example to illustrate the previous and new mappings. - -For a base value (@rb_next_seq) of 2 2000 0000... - -Before this change the range of possible return values was: - -1 2000 0001 to 2 2000 0000 - -__ulseq_to_u64seq(1fff ffff) => 2 1fff ffff -__ulseq_to_u64seq(2000 0000) => 2 2000 0000 -__ulseq_to_u64seq(2000 0001) => 1 2000 0001 -__ulseq_to_u64seq(9fff ffff) => 1 9fff ffff -__ulseq_to_u64seq(a000 0000) => 1 a000 0000 -__ulseq_to_u64seq(a000 0001) => 1 a000 0001 - -After this change the range of possible return values are: -1 a000 0001 to 2 a000 0000 - -__ulseq_to_u64seq(1fff ffff) => 2 1fff ffff -__ulseq_to_u64seq(2000 0000) => 2 2000 0000 -__ulseq_to_u64seq(2000 0001) => 2 2000 0001 -__ulseq_to_u64seq(9fff ffff) => 2 9fff ffff -__ulseq_to_u64seq(a000 0000) => 2 a000 0000 -__ulseq_to_u64seq(a000 0001) => 1 a000 0001 - -[ john.ogness: Rewrite commit message. ] - -Reported-by: Francesco Dolcini <francesco@dolcini.it> -Reported-by: kernel test robot <oliver.sang@intel.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -407,7 +407,7 @@ static inline u64 __ulseq_to_u64seq(stru - * Also the access to the ring buffer is always safe. - */ - rb_next_seq = prb_next_seq(rb); -- seq = rb_next_seq - ((u32)rb_next_seq - ulseq); -+ seq = rb_next_seq - (s32)((u32)rb_next_seq - ulseq); - - return seq; - } diff --git a/debian/patches-rt/0003-printk-Use-prb_first_seq-as-base-for-32bit-seq-macro.patch b/debian/patches-rt/0003-printk-Use-prb_first_seq-as-base-for-32bit-seq-macro.patch deleted file mode 100644 index f0412681bf..0000000000 --- a/debian/patches-rt/0003-printk-Use-prb_first_seq-as-base-for-32bit-seq-macro.patch +++ /dev/null @@ -1,71 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 22 Nov 2023 16:13:37 +0000 -Subject: [PATCH 03/50] printk: Use prb_first_seq() as base for 32bit seq - macros -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -Note: This change only applies to 32bit architectures. On 64bit - architectures the macros are NOPs. - -Currently prb_next_seq() is used as the base for the 32bit seq -macros __u64seq_to_ulseq() and __ulseq_to_u64seq(). However, in -a follow-up commit, prb_next_seq() will need to make use of the -32bit seq macros. - -Use prb_first_seq() as the base for the 32bit seq macros instead -because it is guaranteed to return 64bit sequence numbers without -relying on any 32bit seq macros. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 2 +- - kernel/printk/printk_ringbuffer.h | 8 ++++---- - 2 files changed, 5 insertions(+), 5 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -1832,7 +1832,7 @@ static int prb_read(struct printk_ringbu - } - - /* Get the sequence number of the tail descriptor. */ --static u64 prb_first_seq(struct printk_ringbuffer *rb) -+u64 prb_first_seq(struct printk_ringbuffer *rb) - { - struct prb_desc_ring *desc_ring = &rb->desc_ring; - enum desc_state d_state; ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -378,6 +378,7 @@ bool prb_read_valid(struct printk_ringbu - bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, - struct printk_info *info, unsigned int *line_count); - -+u64 prb_first_seq(struct printk_ringbuffer *rb); - u64 prb_first_valid_seq(struct printk_ringbuffer *rb); - u64 prb_next_seq(struct printk_ringbuffer *rb); - -@@ -392,12 +393,12 @@ u64 prb_next_seq(struct printk_ringbuffe - - static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) - { -+ u64 rb_first_seq = prb_first_seq(rb); - u64 seq; -- u64 rb_next_seq; - - /* - * The provided sequence is only the lower 32 bits of the ringbuffer -- * sequence. It needs to be expanded to 64bit. Get the next sequence -+ * sequence. It needs to be expanded to 64bit. Get the first sequence - * number from the ringbuffer and fold it. - * - * Having a 32bit representation in the console is sufficient. -@@ -406,8 +407,7 @@ static inline u64 __ulseq_to_u64seq(stru - * - * Also the access to the ring buffer is always safe. - */ -- rb_next_seq = prb_next_seq(rb); -- seq = rb_next_seq - (s32)((u32)rb_next_seq - ulseq); -+ seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq); - - return seq; - } diff --git a/debian/patches-rt/0004-printk-ringbuffer-Do-not-skip-non-finalized-records-.patch b/debian/patches-rt/0004-printk-ringbuffer-Do-not-skip-non-finalized-records-.patch deleted file mode 100644 index 3e559a9bb6..0000000000 --- a/debian/patches-rt/0004-printk-ringbuffer-Do-not-skip-non-finalized-records-.patch +++ /dev/null @@ -1,304 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 19 Oct 2023 10:32:05 +0000 -Subject: [PATCH 04/50] printk: ringbuffer: Do not skip non-finalized records - with prb_next_seq() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -Commit f244b4dc53e5 ("printk: ringbuffer: Improve -prb_next_seq() performance") introduced an optimization for -prb_next_seq() by using best-effort to track recently finalized -records. However, the order of finalization does not -necessarily match the order of the records. The optimization -changed prb_next_seq() to return inconsistent results, possibly -yielding sequence numbers that are not available to readers -because they are preceded by non-finalized records or they are -not yet visible to the reader CPU. - -Rather than simply best-effort tracking recently finalized -records, force the committing writer to read records and -increment the last "contiguous block" of finalized records. In -order to do this, the sequence number instead of ID must be -stored because ID's cannot be directly compared. - -A new memory barrier pair is introduced to guarantee that a -reader can always read the records up until the sequence number -returned by prb_next_seq() (unless the records have since -been overwritten in the ringbuffer). - -This restores the original functionality of prb_next_seq() -while also keeping the optimization. - -For 32bit systems, only the lower 32 bits of the sequence -number are stored. When reading the value, it is expanded to -the full 64bit sequence number using the 32bit seq macros, -which fold in the value returned by prb_first_seq(). - -Fixes: f244b4dc53e5 ("printk: ringbuffer: Improve prb_next_seq() performance") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 164 ++++++++++++++++++++++++++++---------- - kernel/printk/printk_ringbuffer.h | 4 - 2 files changed, 127 insertions(+), 41 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -6,6 +6,7 @@ - #include <linux/errno.h> - #include <linux/bug.h> - #include "printk_ringbuffer.h" -+#include "internal.h" - - /** - * DOC: printk_ringbuffer overview -@@ -303,6 +304,9 @@ - * - * desc_push_tail:B / desc_reserve:D - * set descriptor reusable (state), then push descriptor tail (id) -+ * -+ * desc_update_last_finalized:A / desc_last_finalized_seq:A -+ * store finalized record, then set new highest finalized sequence number - */ - - #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) -@@ -1442,19 +1446,117 @@ bool prb_reserve_in_last(struct prb_rese - } - - /* -+ * @last_finalized_seq value guarantees that all records up to and including -+ * this sequence number are finalized and can be read. The only exception are -+ * too old records which have already been overwritten. -+ * -+ * It is also guaranteed that @last_finalized_seq only increases. -+ * -+ * Be aware that finalized records following non-finalized records are not -+ * reported because they are not yet available to the reader. For example, -+ * a new record stored via printk() will not be available to a printer if -+ * it follows a record that has not been finalized yet. However, once that -+ * non-finalized record becomes finalized, @last_finalized_seq will be -+ * appropriately updated and the full set of finalized records will be -+ * available to the printer. And since each printk() caller will either -+ * directly print or trigger deferred printing of all available unprinted -+ * records, all printk() messages will get printed. -+ */ -+static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ unsigned long ulseq; -+ -+ /* -+ * Guarantee the sequence number is loaded before loading the -+ * associated record in order to guarantee that the record can be -+ * seen by this CPU. This pairs with desc_update_last_finalized:A. -+ */ -+ ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq -+ ); /* LMM(desc_last_finalized_seq:A) */ -+ -+ return __ulseq_to_u64seq(rb, ulseq); -+} -+ -+static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, -+ struct printk_record *r, unsigned int *line_count); -+ -+/* -+ * Check if there are records directly following @last_finalized_seq that are -+ * finalized. If so, update @last_finalized_seq to the latest of these -+ * records. It is not allowed to skip over records that are not yet finalized. -+ */ -+static void desc_update_last_finalized(struct printk_ringbuffer *rb) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ u64 old_seq = desc_last_finalized_seq(rb); -+ unsigned long oldval; -+ unsigned long newval; -+ u64 finalized_seq; -+ u64 try_seq; -+ -+try_again: -+ finalized_seq = old_seq; -+ try_seq = finalized_seq + 1; -+ -+ /* Try to find later finalized records. */ -+ while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { -+ finalized_seq = try_seq; -+ try_seq++; -+ } -+ -+ /* No update needed if no later finalized record was found. */ -+ if (finalized_seq == old_seq) -+ return; -+ -+ oldval = __u64seq_to_ulseq(old_seq); -+ newval = __u64seq_to_ulseq(finalized_seq); -+ -+ /* -+ * Set the sequence number of a later finalized record that has been -+ * seen. -+ * -+ * Guarantee the record data is visible to other CPUs before storing -+ * its sequence number. This pairs with desc_last_finalized_seq:A. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_last_finalized_seq:A reads from -+ * desc_update_last_finalized:A, then desc_read:A reads from -+ * _prb_commit:B. -+ * -+ * Relies on: -+ * -+ * RELEASE from _prb_commit:B to desc_update_last_finalized:A -+ * matching -+ * ACQUIRE from desc_last_finalized_seq:A to desc_read:A -+ * -+ * Note: _prb_commit:B and desc_update_last_finalized:A can be -+ * different CPUs. However, the desc_update_last_finalized:A -+ * CPU (which performs the release) must have previously seen -+ * _prb_commit:B. -+ */ -+ if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, -+ &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ -+ old_seq = __ulseq_to_u64seq(rb, oldval); -+ goto try_again; -+ } -+} -+ -+/* - * Attempt to finalize a specified descriptor. If this fails, the descriptor - * is either already final or it will finalize itself when the writer commits. - */ --static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) -+static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) - { -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; - unsigned long prev_state_val = DESC_SV(id, desc_committed); - struct prb_desc *d = to_desc(desc_ring, id); - -- atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, -- DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ -- -- /* Best effort to remember the last finalized @id. */ -- atomic_long_set(&desc_ring->last_finalized_id, id); -+ if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, -+ DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ -+ desc_update_last_finalized(rb); -+ } - } - - /** -@@ -1550,7 +1652,7 @@ bool prb_reserve(struct prb_reserved_ent - * readers. (For seq==0 there is no previous descriptor.) - */ - if (info->seq > 0) -- desc_make_final(desc_ring, DESC_ID(id - 1)); -+ desc_make_final(rb, DESC_ID(id - 1)); - - r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); - /* If text data allocation fails, a data-less record is committed. */ -@@ -1643,7 +1745,7 @@ void prb_commit(struct prb_reserved_entr - */ - head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ - if (head_id != e->id) -- desc_make_final(desc_ring, e->id); -+ desc_make_final(e->rb, e->id); - } - - /** -@@ -1663,12 +1765,9 @@ void prb_commit(struct prb_reserved_entr - */ - void prb_final_commit(struct prb_reserved_entry *e) - { -- struct prb_desc_ring *desc_ring = &e->rb->desc_ring; -- - _prb_commit(e, desc_finalized); - -- /* Best effort to remember the last finalized @id. */ -- atomic_long_set(&desc_ring->last_finalized_id, e->id); -+ desc_update_last_finalized(e->rb); - } - - /* -@@ -2008,7 +2107,9 @@ u64 prb_first_valid_seq(struct printk_ri - * newest sequence number available to readers will be. - * - * This provides readers a sequence number to jump to if all currently -- * available records should be skipped. -+ * available records should be skipped. It is guaranteed that all records -+ * previous to the returned value have been finalized and are (or were) -+ * available to the reader. - * - * Context: Any context. - * Return: The sequence number of the next newest (not yet available) record -@@ -2016,34 +2117,19 @@ u64 prb_first_valid_seq(struct printk_ri - */ - u64 prb_next_seq(struct printk_ringbuffer *rb) - { -- struct prb_desc_ring *desc_ring = &rb->desc_ring; -- enum desc_state d_state; -- unsigned long id; - u64 seq; - -- /* Check if the cached @id still points to a valid @seq. */ -- id = atomic_long_read(&desc_ring->last_finalized_id); -- d_state = desc_read(desc_ring, id, NULL, &seq, NULL); -+ seq = desc_last_finalized_seq(rb); - -- if (d_state == desc_finalized || d_state == desc_reusable) { -- /* -- * Begin searching after the last finalized record. -- * -- * On 0, the search must begin at 0 because of hack#2 -- * of the bootstrapping phase it is not known if a -- * record at index 0 exists. -- */ -- if (seq != 0) -- seq++; -- } else { -- /* -- * The information about the last finalized sequence number -- * has gone. It should happen only when there is a flood of -- * new messages and the ringbuffer is rapidly recycled. -- * Give up and start from the beginning. -- */ -- seq = 0; -- } -+ /* -+ * Begin searching after the last finalized record. -+ * -+ * On 0, the search must begin at 0 because of hack#2 -+ * of the bootstrapping phase it is not known if a -+ * record at index 0 exists. -+ */ -+ if (seq != 0) -+ seq++; - - /* - * The information about the last finalized @seq might be inaccurate. -@@ -2085,7 +2171,7 @@ void prb_init(struct printk_ringbuffer * - rb->desc_ring.infos = infos; - atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); - atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); -- atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); -+ atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); - - rb->text_data_ring.size_bits = textbits; - rb->text_data_ring.data = text_buf; ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -75,7 +75,7 @@ struct prb_desc_ring { - struct printk_info *infos; - atomic_long_t head_id; - atomic_long_t tail_id; -- atomic_long_t last_finalized_id; -+ atomic_long_t last_finalized_seq; - }; - - /* -@@ -259,7 +259,7 @@ static struct printk_ringbuffer name = { - .infos = &_##name##_infos[0], \ - .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ - .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ -- .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \ -+ .last_finalized_seq = ATOMIC_INIT(0), \ - }, \ - .text_data_ring = { \ - .size_bits = (avgtextbits) + (descbits), \ diff --git a/debian/patches-rt/0007-printk-Add-this_cpu_in_panic.patch b/debian/patches-rt/0007-printk-Add-this_cpu_in_panic.patch deleted file mode 100644 index 4168b8ed07..0000000000 --- a/debian/patches-rt/0007-printk-Add-this_cpu_in_panic.patch +++ /dev/null @@ -1,88 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 13 Oct 2023 14:30:49 +0000 -Subject: [PATCH 07/50] printk: Add this_cpu_in_panic() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -There is already panic_in_progress() and other_cpu_in_panic(), -but checking if the current CPU is the panic CPU must still be -open coded. - -Add this_cpu_in_panic() to complete the set. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/internal.h | 1 + - kernel/printk/printk.c | 43 +++++++++++++++++++++++-------------------- - 2 files changed, 24 insertions(+), 20 deletions(-) - ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -130,6 +130,7 @@ struct printk_message { - }; - - bool other_cpu_in_panic(void); -+bool this_cpu_in_panic(void); - bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - bool is_extended, bool may_supress); - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -347,6 +347,29 @@ static bool panic_in_progress(void) - return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); - } - -+/* Return true if a panic is in progress on the current CPU. */ -+bool this_cpu_in_panic(void) -+{ -+ /* -+ * We can use raw_smp_processor_id() here because it is impossible for -+ * the task to be migrated to the panic_cpu, or away from it. If -+ * panic_cpu has already been set, and we're not currently executing on -+ * that CPU, then we never will be. -+ */ -+ return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); -+} -+ -+/* -+ * Return true if a panic is in progress on a remote CPU. -+ * -+ * On true, the local CPU should immediately release any printing resources -+ * that may be needed by the panic CPU. -+ */ -+bool other_cpu_in_panic(void) -+{ -+ return (panic_in_progress() && !this_cpu_in_panic()); -+} -+ - /* - * This is used for debugging the mess that is the VT code by - * keeping track if we have the console semaphore held. It's -@@ -2600,26 +2623,6 @@ static int console_cpu_notify(unsigned i - return 0; - } - --/* -- * Return true if a panic is in progress on a remote CPU. -- * -- * On true, the local CPU should immediately release any printing resources -- * that may be needed by the panic CPU. -- */ --bool other_cpu_in_panic(void) --{ -- if (!panic_in_progress()) -- return false; -- -- /* -- * We can use raw_smp_processor_id() here because it is impossible for -- * the task to be migrated to the panic_cpu, or away from it. If -- * panic_cpu has already been set, and we're not currently executing on -- * that CPU, then we never will be. -- */ -- return atomic_read(&panic_cpu) != raw_smp_processor_id(); --} -- - /** - * console_lock - block the console subsystem from printing - * diff --git a/debian/patches-rt/0008-printk-ringbuffer-Cleanup-reader-terminology.patch b/debian/patches-rt/0008-printk-ringbuffer-Cleanup-reader-terminology.patch deleted file mode 100644 index cfd6ca7a28..0000000000 --- a/debian/patches-rt/0008-printk-ringbuffer-Cleanup-reader-terminology.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 6 Nov 2023 15:01:58 +0000 -Subject: [PATCH 08/50] printk: ringbuffer: Cleanup reader terminology -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -With the lockless ringbuffer, it is allowed that multiple -CPUs/contexts write simultaneously into the buffer. This creates -an ambiguity as some writers will finalize sooner. - -The documentation for the prb_read functions is not clear as it -refers to "not yet written" and "no data available". Clarify the -return values and language to be in terms of the reader: records -available for reading. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -1987,11 +1987,13 @@ u64 prb_first_seq(struct printk_ringbuff - } - - /* -- * Non-blocking read of a record. Updates @seq to the last finalized record -- * (which may have no data available). -+ * Non-blocking read of a record. - * -- * See the description of prb_read_valid() and prb_read_valid_info() -- * for details. -+ * On success @seq is updated to the record that was read and (if provided) -+ * @r and @line_count will contain the read/calculated data. -+ * -+ * On failure @seq is updated to a record that is not yet available to the -+ * reader, but it will be the next record available to the reader. - */ - static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, - struct printk_record *r, unsigned int *line_count) -@@ -2010,7 +2012,7 @@ static bool _prb_read_valid(struct print - *seq = tail_seq; - - } else if (err == -ENOENT) { -- /* Record exists, but no data available. Skip. */ -+ /* Record exists, but the data was lost. Skip. */ - (*seq)++; - - } else { -@@ -2043,7 +2045,7 @@ static bool _prb_read_valid(struct print - * On success, the reader must check r->info.seq to see which record was - * actually read. This allows the reader to detect dropped records. - * -- * Failure means @seq refers to a not yet written record. -+ * Failure means @seq refers to a record not yet available to the reader. - */ - bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, - struct printk_record *r) -@@ -2073,7 +2075,7 @@ bool prb_read_valid(struct printk_ringbu - * On success, the reader must check info->seq to see which record meta data - * was actually read. This allows the reader to detect dropped records. - * -- * Failure means @seq refers to a not yet written record. -+ * Failure means @seq refers to a record not yet available to the reader. - */ - bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, - struct printk_info *info, unsigned int *line_count) diff --git a/debian/patches-rt/0009-printk-Wait-for-all-reserved-records-with-pr_flush.patch b/debian/patches-rt/0009-printk-Wait-for-all-reserved-records-with-pr_flush.patch deleted file mode 100644 index 3ae222c891..0000000000 --- a/debian/patches-rt/0009-printk-Wait-for-all-reserved-records-with-pr_flush.patch +++ /dev/null @@ -1,170 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 6 Nov 2023 14:59:55 +0000 -Subject: [PATCH 09/50] printk: Wait for all reserved records with pr_flush() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -Currently pr_flush() will only wait for records that were -available to readers at the time of the call (using -prb_next_seq()). But there may be more records (non-finalized) -that have following finalized records. pr_flush() should wait -for these to print as well. Particularly because any trailing -finalized records may be the messages that the calling context -wants to ensure are printed. - -Add a new ringbuffer function prb_next_reserve_seq() to return -the sequence number following the most recently reserved record. -This guarantees that pr_flush() will wait until all current -printk() messages (completed or in progress) have been printed. - -Fixes: 3b604ca81202 ("printk: add pr_flush()") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 2 - kernel/printk/printk_ringbuffer.c | 113 ++++++++++++++++++++++++++++++++++++++ - kernel/printk/printk_ringbuffer.h | 1 - 3 files changed, 115 insertions(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3763,7 +3763,7 @@ static bool __pr_flush(struct console *c - - might_sleep(); - -- seq = prb_next_seq(prb); -+ seq = prb_next_reserve_seq(prb); - - /* Flush the consoles so that records up to @seq are printed. */ - console_lock(); ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -1986,6 +1986,119 @@ u64 prb_first_seq(struct printk_ringbuff - return seq; - } - -+/** -+ * prb_next_reserve_seq() - Get the sequence number after the most recently -+ * reserved record. -+ * -+ * @rb: The ringbuffer to get the sequence number from. -+ * -+ * This is the public function available to readers to see what sequence -+ * number will be assigned to the next reserved record. -+ * -+ * Note that depending on the situation, this value can be equal to or -+ * higher than the sequence number returned by prb_next_seq(). -+ * -+ * Context: Any context. -+ * Return: The sequence number that will be assigned to the next record -+ * reserved. -+ */ -+u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ unsigned long last_finalized_id; -+ atomic_long_t *state_var; -+ u64 last_finalized_seq; -+ unsigned long head_id; -+ struct prb_desc desc; -+ unsigned long diff; -+ struct prb_desc *d; -+ int err; -+ -+ /* -+ * It may not be possible to read a sequence number for @head_id. -+ * So the ID of @last_finailzed_seq is used to calculate what the -+ * sequence number of @head_id will be. -+ */ -+ -+try_again: -+ last_finalized_seq = desc_last_finalized_seq(rb); -+ -+ /* -+ * @head_id is loaded after @last_finalized_seq to ensure that it is -+ * at or beyond @last_finalized_seq. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_last_finalized_seq:A reads from -+ * desc_update_last_finalized:A, then -+ * prb_next_reserve_seq:A reads from desc_reserve:D. -+ * -+ * Relies on: -+ * -+ * RELEASE from desc_reserve:D to desc_update_last_finalized:A -+ * matching -+ * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A -+ * -+ * Note: desc_reserve:D and desc_update_last_finalized:A can be -+ * different CPUs. However, the desc_update_last_finalized:A CPU -+ * (which performs the release) must have previously seen -+ * desc_read:C, which implies desc_reserve:D can be seen. -+ */ -+ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ -+ -+ d = to_desc(desc_ring, last_finalized_seq); -+ state_var = &d->state_var; -+ -+ /* Extract the ID, used to specify the descriptor to read. */ -+ last_finalized_id = DESC_ID(atomic_long_read(state_var)); -+ -+ /* Ensure @last_finalized_id is correct. */ -+ err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); -+ -+ if (err == -EINVAL) { -+ if (last_finalized_seq == 0) { -+ /* -+ * @last_finalized_seq still contains its initial -+ * value. Probably no record has been finalized yet. -+ * This means the ringbuffer is not yet full and the -+ * @head_id value can be used directly (subtracting -+ * off the id value corresponding to seq=0). -+ */ -+ -+ /* -+ * Because of hack#2 of the bootstrapping phase, the -+ * @head_id initial value must be handled separately. -+ */ -+ if (head_id == DESC0_ID(desc_ring->count_bits)) -+ return 0; -+ -+ /* -+ * The @head_id is initialized such that the first -+ * increment will yield the first record (seq=0). -+ * Therefore use the initial value +1 as the base to -+ * subtract from @head_id. -+ */ -+ last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; -+ } else { -+ /* Record must have been overwritten. Try again. */ -+ goto try_again; -+ } -+ } -+ -+ /* -+ * @diff is the number of records beyond the last record available -+ * to readers. -+ */ -+ diff = head_id - last_finalized_id; -+ -+ /* -+ * @head_id points to the most recently reserved record, but this -+ * function returns the sequence number that will be assigned to the -+ * next (not yet reserved) record. Thus +1 is needed. -+ */ -+ return (last_finalized_seq + diff + 1); -+} -+ - /* - * Non-blocking read of a record. - * ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -395,6 +395,7 @@ bool prb_read_valid_info(struct printk_r - u64 prb_first_seq(struct printk_ringbuffer *rb); - u64 prb_first_valid_seq(struct printk_ringbuffer *rb); - u64 prb_next_seq(struct printk_ringbuffer *rb); -+u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); - - #ifdef CONFIG_64BIT - diff --git a/debian/patches-rt/0010-printk-ringbuffer-Skip-non-finalized-records-in-pani.patch b/debian/patches-rt/0010-printk-ringbuffer-Skip-non-finalized-records-in-pani.patch deleted file mode 100644 index a92123216f..0000000000 --- a/debian/patches-rt/0010-printk-ringbuffer-Skip-non-finalized-records-in-pani.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 13 Oct 2023 10:23:11 +0000 -Subject: [PATCH 10/50] printk: ringbuffer: Skip non-finalized records in panic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -Normally a reader will stop once reaching a non-finalized -record. However, when a panic happens, writers from other CPUs -(or an interrupted context on the panic CPU) may have been -writing a record and were unable to finalize it. The panic CPU -will reserve/commit/finalize its panic records, but these will -be located after the non-finalized records. This results in -panic() not flushing the panic messages. - -Extend _prb_read_valid() to skip over non-finalized records if -on the panic CPU. - -Fixes: 896fbe20b4e2 ("printk: use the lockless ringbuffer") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 28 ++++++++++++++++++++++++++-- - 1 file changed, 26 insertions(+), 2 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -2107,6 +2107,10 @@ u64 prb_next_reserve_seq(struct printk_r - * - * On failure @seq is updated to a record that is not yet available to the - * reader, but it will be the next record available to the reader. -+ * -+ * Note: When the current CPU is in panic, this function will skip over any -+ * non-existent/non-finalized records in order to allow the panic CPU -+ * to print any and all records that have been finalized. - */ - static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, - struct printk_record *r, unsigned int *line_count) -@@ -2129,8 +2133,28 @@ static bool _prb_read_valid(struct print - (*seq)++; - - } else { -- /* Non-existent/non-finalized record. Must stop. */ -- return false; -+ /* -+ * Non-existent/non-finalized record. Must stop. -+ * -+ * For panic situations it cannot be expected that -+ * non-finalized records will become finalized. But -+ * there may be other finalized records beyond that -+ * need to be printed for a panic situation. If this -+ * is the panic CPU, skip this -+ * non-existent/non-finalized record unless it is -+ * at or beyond the head, in which case it is not -+ * possible to continue. -+ * -+ * Note that new messages printed on panic CPU are -+ * finalized when we are here. The only exception -+ * might be the last message without trailing newline. -+ * But it would have the sequence number returned -+ * by "prb_next_reserve_seq() - 1". -+ */ -+ if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) -+ (*seq)++; -+ else -+ return false; - } - } - diff --git a/debian/patches-rt/0012-printk-Disable-passing-console-lock-owner-completely.patch b/debian/patches-rt/0012-printk-Disable-passing-console-lock-owner-completely.patch deleted file mode 100644 index adc25ac39d..0000000000 --- a/debian/patches-rt/0012-printk-Disable-passing-console-lock-owner-completely.patch +++ /dev/null @@ -1,107 +0,0 @@ -From: Petr Mladek <pmladek@suse.com> -Date: Fri, 13 Oct 2023 14:12:05 +0000 -Subject: [PATCH 12/50] printk: Disable passing console lock owner completely - during panic() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.7/older/patches-6.7-rt6.tar.xz - -The commit d51507098ff91 ("printk: disable optimistic spin -during panic") added checks to avoid becoming a console waiter -if a panic is in progress. - -However, the transition to panic can occur while there is -already a waiter. The current owner should not pass the lock to -the waiter because it might get stopped or blocked anytime. - -Also the panic context might pass the console lock owner to an -already stopped waiter by mistake. It might happen when -console_flush_on_panic() ignores the current lock owner, for -example: - -CPU0 CPU1 ----- ---- -console_lock_spinning_enable() - console_trylock_spinning() - [CPU1 now console waiter] -NMI: panic() - panic_other_cpus_shutdown() - [stopped as console waiter] - console_flush_on_panic() - console_lock_spinning_enable() - [print 1 record] - console_lock_spinning_disable_and_check() - [handover to stopped CPU1] - -This results in panic() not flushing the panic messages. - -Fix these problems by disabling all spinning operations -completely during panic(). - -Another advantage is that it prevents possible deadlocks caused -by "console_owner_lock". The panic() context does not need to -take it any longer. The lockless checks are safe because the -functions become NOPs when they see the panic in progress. All -operations manipulating the state are still synchronized by the -lock even when non-panic CPUs would notice the panic -synchronously. - -The current owner might stay spinning. But non-panic() CPUs -would get stopped anyway and the panic context will never start -spinning. - -Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") -Signed-off-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1869,10 +1869,23 @@ static bool console_waiter; - */ - static void console_lock_spinning_enable(void) - { -+ /* -+ * Do not use spinning in panic(). The panic CPU wants to keep the lock. -+ * Non-panic CPUs abandon the flush anyway. -+ * -+ * Just keep the lockdep annotation. The panic-CPU should avoid -+ * taking console_owner_lock because it might cause a deadlock. -+ * This looks like the easiest way how to prevent false lockdep -+ * reports without handling races a lockless way. -+ */ -+ if (panic_in_progress()) -+ goto lockdep; -+ - raw_spin_lock(&console_owner_lock); - console_owner = current; - raw_spin_unlock(&console_owner_lock); - -+lockdep: - /* The waiter may spin on us after setting console_owner */ - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); - } -@@ -1897,6 +1910,22 @@ static int console_lock_spinning_disable - { - int waiter; - -+ /* -+ * Ignore spinning waiters during panic() because they might get stopped -+ * or blocked at any time, -+ * -+ * It is safe because nobody is allowed to start spinning during panic -+ * in the first place. If there has been a waiter then non panic CPUs -+ * might stay spinning. They would get stopped anyway. The panic context -+ * will never start spinning and an interrupted spin on panic CPU will -+ * never continue. -+ */ -+ if (panic_in_progress()) { -+ /* Keep lockdep happy. */ -+ spin_release(&console_owner_dep_map, _THIS_IP_); -+ return 0; -+ } -+ - raw_spin_lock(&console_owner_lock); - waiter = READ_ONCE(console_waiter); - console_owner = NULL; diff --git a/debian/patches-rt/ARM__Allow_to_enable_RT.patch b/debian/patches-rt/ARM__Allow_to_enable_RT.patch index 296114d458..b5749a71ea 100644 --- a/debian/patches-rt/ARM__Allow_to_enable_RT.patch +++ b/debian/patches-rt/ARM__Allow_to_enable_RT.patch @@ -17,15 +17,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -35,6 +35,7 @@ config ARM - select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 +@@ -36,6 +36,7 @@ select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE + select ARCH_SUPPORTS_PER_VMA_LOCK + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST -@@ -119,6 +120,7 @@ config ARM +@@ -120,6 +121,7 @@ select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP diff --git a/debian/patches-rt/series b/debian/patches-rt/series index 284e04d142..f3d94c14e2 100644 --- a/debian/patches-rt/series +++ b/debian/patches-rt/series @@ -44,18 +44,9 @@ preempt-Put-preempt_enable-within-an-instrumentation.patch ########################################################################### # John's printk queue ########################################################################### -0001-printk-nbcon-Relocate-32bit-seq-macros.patch -0002-printk-Adjust-mapping-for-32bit-seq-macros.patch -0003-printk-Use-prb_first_seq-as-base-for-32bit-seq-macro.patch -0004-printk-ringbuffer-Do-not-skip-non-finalized-records-.patch 0005-printk-ringbuffer-Clarify-special-lpos-values.patch 0006-printk-For-suppress_panic_printk-check-for-other-CPU.patch -0007-printk-Add-this_cpu_in_panic.patch -0008-printk-ringbuffer-Cleanup-reader-terminology.patch -0009-printk-Wait-for-all-reserved-records-with-pr_flush.patch -0010-printk-ringbuffer-Skip-non-finalized-records-in-pani.patch 0011-printk-ringbuffer-Consider-committed-as-finalized-in.patch -0012-printk-Disable-passing-console-lock-owner-completely.patch 0013-printk-Avoid-non-panic-CPUs-writing-to-ringbuffer.patch 0014-panic-Flush-kernel-log-buffer-at-the-end.patch 0015-printk-Consider-nbcon-boot-consoles-on-seq-init.patch diff --git a/debian/patches/bugfix/x86/Documentation-hw-vuln-Add-documentation-for-RFDS.patch b/debian/patches/bugfix/x86/Documentation-hw-vuln-Add-documentation-for-RFDS.patch deleted file mode 100644 index 781be97097..0000000000 --- a/debian/patches/bugfix/x86/Documentation-hw-vuln-Add-documentation-for-RFDS.patch +++ /dev/null @@ -1,140 +0,0 @@ -From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Date: Mon, 11 Mar 2024 12:29:43 -0700 -Subject: Documentation/hw-vuln: Add documentation for RFDS -Origin: https://git.kernel.org/linus/4e42765d1be01111df0c0275bbaf1db1acef346e - -Add the documentation for transient execution vulnerability Register -File Data Sampling (RFDS) that affects Intel Atom CPUs. - -Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> ---- - Documentation/admin-guide/hw-vuln/index.rst | 1 + - .../hw-vuln/reg-file-data-sampling.rst | 104 ++++++++++++++++++ - 2 files changed, 105 insertions(+) - create mode 100644 Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst - -diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst -index de99caabf65a..ff0b440ef2dc 100644 ---- a/Documentation/admin-guide/hw-vuln/index.rst -+++ b/Documentation/admin-guide/hw-vuln/index.rst -@@ -21,3 +21,4 @@ are configurable at compile, boot or run time. - cross-thread-rsb - srso - gather_data_sampling -+ reg-file-data-sampling -diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst -new file mode 100644 -index 000000000000..0585d02b9a6c ---- /dev/null -+++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst -@@ -0,0 +1,104 @@ -+================================== -+Register File Data Sampling (RFDS) -+================================== -+ -+Register File Data Sampling (RFDS) is a microarchitectural vulnerability that -+only affects Intel Atom parts(also branded as E-cores). RFDS may allow -+a malicious actor to infer data values previously used in floating point -+registers, vector registers, or integer registers. RFDS does not provide the -+ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS. -+ -+Affected Processors -+=================== -+Below is the list of affected Intel processors [#f1]_: -+ -+ =================== ============ -+ Common name Family_Model -+ =================== ============ -+ ATOM_GOLDMONT 06_5CH -+ ATOM_GOLDMONT_D 06_5FH -+ ATOM_GOLDMONT_PLUS 06_7AH -+ ATOM_TREMONT_D 06_86H -+ ATOM_TREMONT 06_96H -+ ALDERLAKE 06_97H -+ ALDERLAKE_L 06_9AH -+ ATOM_TREMONT_L 06_9CH -+ RAPTORLAKE 06_B7H -+ RAPTORLAKE_P 06_BAH -+ ATOM_GRACEMONT 06_BEH -+ RAPTORLAKE_S 06_BFH -+ =================== ============ -+ -+As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and -+RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as -+vulnerable in Linux because they share the same family/model with an affected -+part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or -+CPUID.HYBRID. This information could be used to distinguish between the -+affected and unaffected parts, but it is deemed not worth adding complexity as -+the reporting is fixed automatically when these parts enumerate RFDS_NO. -+ -+Mitigation -+========== -+Intel released a microcode update that enables software to clear sensitive -+information using the VERW instruction. Like MDS, RFDS deploys the same -+mitigation strategy to force the CPU to clear the affected buffers before an -+attacker can extract the secrets. This is achieved by using the otherwise -+unused and obsolete VERW instruction in combination with a microcode update. -+The microcode clears the affected CPU buffers when the VERW instruction is -+executed. -+ -+Mitigation points -+----------------- -+VERW is executed by the kernel before returning to user space, and by KVM -+before VMentry. None of the affected cores support SMT, so VERW is not required -+at C-state transitions. -+ -+New bits in IA32_ARCH_CAPABILITIES -+---------------------------------- -+Newer processors and microcode update on existing affected processors added new -+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate -+vulnerability and mitigation capability: -+ -+- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS. -+- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the -+ microcode that clears the affected buffers on VERW execution. -+ -+Mitigation control on the kernel command line -+--------------------------------------------- -+The kernel command line allows to control RFDS mitigation at boot time with the -+parameter "reg_file_data_sampling=". The valid arguments are: -+ -+ ========== ================================================================= -+ on If the CPU is vulnerable, enable mitigation; CPU buffer clearing -+ on exit to userspace and before entering a VM. -+ off Disables mitigation. -+ ========== ================================================================= -+ -+Mitigation default is selected by CONFIG_MITIGATION_RFDS. -+ -+Mitigation status information -+----------------------------- -+The Linux kernel provides a sysfs interface to enumerate the current -+vulnerability status of the system: whether the system is vulnerable, and -+which mitigations are active. The relevant sysfs file is: -+ -+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling -+ -+The possible values in this file are: -+ -+ .. list-table:: -+ -+ * - 'Not affected' -+ - The processor is not vulnerable -+ * - 'Vulnerable' -+ - The processor is vulnerable, but no mitigation enabled -+ * - 'Vulnerable: No microcode' -+ - The processor is vulnerable but microcode is not updated. -+ * - 'Mitigation: Clear Register File' -+ - The processor is vulnerable and the CPU buffer clearing mitigation is -+ enabled. -+ -+References -+---------- -+.. [#f1] Affected Processors -+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html --- -2.43.0 - diff --git a/debian/patches/bugfix/x86/KVM-x86-Export-RFDS_NO-and-RFDS_CLEAR-to-guests.patch b/debian/patches/bugfix/x86/KVM-x86-Export-RFDS_NO-and-RFDS_CLEAR-to-guests.patch deleted file mode 100644 index 13a5c96a49..0000000000 --- a/debian/patches/bugfix/x86/KVM-x86-Export-RFDS_NO-and-RFDS_CLEAR-to-guests.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Date: Mon, 11 Mar 2024 12:29:43 -0700 -Subject: KVM/x86: Export RFDS_NO and RFDS_CLEAR to guests -Origin: https://git.kernel.org/linus/2a0180129d726a4b953232175857d442651b55a0 - -Mitigation for RFDS requires RFDS_CLEAR capability which is enumerated -by MSR_IA32_ARCH_CAPABILITIES bit 27. If the host has it set, export it -to guests so that they can deploy the mitigation. - -RFDS_NO indicates that the system is not vulnerable to RFDS, export it -to guests so that they don't deploy the mitigation unnecessarily. When -the host is not affected by X86_BUG_RFDS, but has RFDS_NO=0, synthesize -RFDS_NO to the guest. - -Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> ---- - arch/x86/kvm/x86.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 48a61d283406..68fdf3ba031a 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -1623,7 +1623,8 @@ static bool kvm_is_immutable_feature_msr(u32 msr) - ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ - ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ - ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ -- ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) -+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ -+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) - - static u64 kvm_get_arch_capabilities(void) - { -@@ -1655,6 +1656,8 @@ static u64 kvm_get_arch_capabilities(void) - data |= ARCH_CAP_SSB_NO; - if (!boot_cpu_has_bug(X86_BUG_MDS)) - data |= ARCH_CAP_MDS_NO; -+ if (!boot_cpu_has_bug(X86_BUG_RFDS)) -+ data |= ARCH_CAP_RFDS_NO; - - if (!boot_cpu_has(X86_FEATURE_RTM)) { - /* --- -2.43.0 - diff --git a/debian/patches/bugfix/x86/platform-x86-p2sb-On-Goldmont-only-cache-P2SB-and-SP.patch b/debian/patches/bugfix/x86/platform-x86-p2sb-On-Goldmont-only-cache-P2SB-and-SP.patch deleted file mode 100644 index 50e5f8dc5d..0000000000 --- a/debian/patches/bugfix/x86/platform-x86-p2sb-On-Goldmont-only-cache-P2SB-and-SP.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Hans de Goede <hdegoede@redhat.com> -Date: Mon, 4 Mar 2024 14:43:55 +0100 -Subject: platform/x86: p2sb: On Goldmont only cache P2SB and SPI devfn BAR -Origin: https://git.kernel.org/linus/aec7d25b497ce4a8d044e9496de0aa433f7f8f06 -Bug-Debian: https://bugs.debian.org/1065320 - -On Goldmont p2sb_bar() only ever gets called for 2 devices, the actual P2SB -devfn 13,0 and the SPI controller which is part of the P2SB, devfn 13,2. - -But the current p2sb code tries to cache BAR0 info for all of -devfn 13,0 to 13,7 . This involves calling pci_scan_single_device() -for device 13 functions 0-7 and the hw does not seem to like -pci_scan_single_device() getting called for some of the other hidden -devices. E.g. on an ASUS VivoBook D540NV-GQ065T this leads to continuous -ACPI errors leading to high CPU usage. - -Fix this by only caching BAR0 info and thus only calling -pci_scan_single_device() for the P2SB and the SPI controller. - -Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe") -Reported-by: Danil Rybakov <danilrybakov249@gmail.com> -Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218531 -Tested-by: Danil Rybakov <danilrybakov249@gmail.com> -Signed-off-by: Hans de Goede <hdegoede@redhat.com> -Link: https://lore.kernel.org/r/20240304134356.305375-2-hdegoede@redhat.com ---- - drivers/platform/x86/p2sb.c | 25 +++++++++---------------- - 1 file changed, 9 insertions(+), 16 deletions(-) - -diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c -index 6bd14d0132db..3d66e1d4eb1f 100644 ---- a/drivers/platform/x86/p2sb.c -+++ b/drivers/platform/x86/p2sb.c -@@ -20,9 +20,11 @@ - #define P2SBC_HIDE BIT(8) - - #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) -+#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) -+#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) - - static const struct x86_cpu_id p2sb_cpu_ids[] = { -- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), -+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), - {} - }; - -@@ -98,21 +100,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) - - static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) - { -- unsigned int slot, fn; -- -- if (PCI_FUNC(devfn) == 0) { -- /* -- * When function number of the P2SB device is zero, scan it and -- * other function numbers, and if devices are available, cache -- * their BAR0s. -- */ -- slot = PCI_SLOT(devfn); -- for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) -- p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); -- } else { -- /* Scan the P2SB device and cache its BAR0 */ -- p2sb_scan_and_cache_devfn(bus, devfn); -- } -+ /* Scan the P2SB device and cache its BAR0 */ -+ p2sb_scan_and_cache_devfn(bus, devfn); -+ -+ /* On Goldmont p2sb_bar() also gets called for the SPI controller */ -+ if (devfn == P2SB_DEVFN_GOLDMONT) -+ p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); - - if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) - return -ENOENT; --- -2.43.0 - diff --git a/debian/patches/bugfix/x86/x86-mmio-Disable-KVM-mitigation-when-X86_FEATURE_CLE.patch b/debian/patches/bugfix/x86/x86-mmio-Disable-KVM-mitigation-when-X86_FEATURE_CLE.patch deleted file mode 100644 index 313064d2bc..0000000000 --- a/debian/patches/bugfix/x86/x86-mmio-Disable-KVM-mitigation-when-X86_FEATURE_CLE.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Date: Mon, 11 Mar 2024 12:29:43 -0700 -Subject: x86/mmio: Disable KVM mitigation when X86_FEATURE_CLEAR_CPU_BUF is - set -Origin: https://git.kernel.org/linus/e95df4ec0c0c9791941f112db699fae794b9862a - -Currently MMIO Stale Data mitigation for CPUs not affected by MDS/TAA is -to only deploy VERW at VMentry by enabling mmio_stale_data_clear static -branch. No mitigation is needed for kernel->user transitions. If such -CPUs are also affected by RFDS, its mitigation may set -X86_FEATURE_CLEAR_CPU_BUF to deploy VERW at kernel->user and VMentry. -This could result in duplicate VERW at VMentry. - -Fix this by disabling mmio_stale_data_clear static branch when -X86_FEATURE_CLEAR_CPU_BUF is enabled. - -Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> ---- - arch/x86/kernel/cpu/bugs.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 48d049cd74e7..cd6ac89c1a0d 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -422,6 +422,13 @@ static void __init mmio_select_mitigation(void) - if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && - boot_cpu_has(X86_FEATURE_RTM))) - setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); -+ -+ /* -+ * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based -+ * mitigations, disable KVM-only mitigation in that case. -+ */ -+ if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) -+ static_branch_disable(&mmio_stale_data_clear); - else - static_branch_enable(&mmio_stale_data_clear); - -@@ -498,8 +505,11 @@ static void __init md_clear_update_mitigation(void) - taa_mitigation = TAA_MITIGATION_VERW; - taa_select_mitigation(); - } -- if (mmio_mitigation == MMIO_MITIGATION_OFF && -- boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { -+ /* -+ * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear -+ * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state. -+ */ -+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { - mmio_mitigation = MMIO_MITIGATION_VERW; - mmio_select_mitigation(); - } --- -2.43.0 - diff --git a/debian/patches/bugfix/x86/x86-rfds-Mitigate-Register-File-Data-Sampling-RFDS.patch b/debian/patches/bugfix/x86/x86-rfds-Mitigate-Register-File-Data-Sampling-RFDS.patch deleted file mode 100644 index 21603126c5..0000000000 --- a/debian/patches/bugfix/x86/x86-rfds-Mitigate-Register-File-Data-Sampling-RFDS.patch +++ /dev/null @@ -1,384 +0,0 @@ -From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Date: Mon, 11 Mar 2024 12:29:43 -0700 -Subject: x86/rfds: Mitigate Register File Data Sampling (RFDS) -Origin: https://git.kernel.org/linus/8076fcde016c9c0e0660543e67bff86cb48a7c9c - -RFDS is a CPU vulnerability that may allow userspace to infer kernel -stale data previously used in floating point registers, vector registers -and integer registers. RFDS only affects certain Intel Atom processors. - -Intel released a microcode update that uses VERW instruction to clear -the affected CPU buffers. Unlike MDS, none of the affected cores support -SMT. - -Add RFDS bug infrastructure and enable the VERW based mitigation by -default, that clears the affected buffers just before exiting to -userspace. Also add sysfs reporting and cmdline parameter -"reg_file_data_sampling" to control the mitigation. - -For details see: -Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst - -Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> ---- - .../ABI/testing/sysfs-devices-system-cpu | 1 + - .../admin-guide/kernel-parameters.txt | 21 +++++ - arch/x86/Kconfig | 11 +++ - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/msr-index.h | 8 ++ - arch/x86/kernel/cpu/bugs.c | 78 ++++++++++++++++++- - arch/x86/kernel/cpu/common.c | 38 ++++++++- - drivers/base/cpu.c | 3 + - include/linux/cpu.h | 2 + - 9 files changed, 157 insertions(+), 6 deletions(-) - -diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu -index a1db6db47505..710d47be11e0 100644 ---- a/Documentation/ABI/testing/sysfs-devices-system-cpu -+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -516,6 +516,7 @@ What: /sys/devices/system/cpu/vulnerabilities - /sys/devices/system/cpu/vulnerabilities/mds - /sys/devices/system/cpu/vulnerabilities/meltdown - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data -+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling - /sys/devices/system/cpu/vulnerabilities/retbleed - /sys/devices/system/cpu/vulnerabilities/spec_store_bypass - /sys/devices/system/cpu/vulnerabilities/spectre_v1 -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 31b3a25680d0..73062d47a462 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -1150,6 +1150,26 @@ - The filter can be disabled or changed to another - driver later using sysfs. - -+ reg_file_data_sampling= -+ [X86] Controls mitigation for Register File Data -+ Sampling (RFDS) vulnerability. RFDS is a CPU -+ vulnerability which may allow userspace to infer -+ kernel data values previously stored in floating point -+ registers, vector registers, or integer registers. -+ RFDS only affects Intel Atom processors. -+ -+ on: Turns ON the mitigation. -+ off: Turns OFF the mitigation. -+ -+ This parameter overrides the compile time default set -+ by CONFIG_MITIGATION_RFDS. Mitigation cannot be -+ disabled when other VERW based mitigations (like MDS) -+ are enabled. In order to disable RFDS mitigation all -+ VERW based mitigations need to be disabled. -+ -+ For details see: -+ Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst -+ - driver_async_probe= [KNL] - List of driver names to be probed asynchronously. * - matches with all driver names. If * is specified, the -@@ -3398,6 +3418,7 @@ - nospectre_bhb [ARM64] - nospectre_v1 [X86,PPC] - nospectre_v2 [X86,PPC,S390,ARM64] -+ reg_file_data_sampling=off [X86] - retbleed=off [X86] - spec_store_bypass_disable=off [X86,PPC] - spectre_v2_user=off [X86] -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 5edec175b9bf..637e337c332e 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -2614,6 +2614,17 @@ config GDS_FORCE_MITIGATION - - If in doubt, say N. - -+config MITIGATION_RFDS -+ bool "RFDS Mitigation" -+ depends on CPU_SUP_INTEL -+ default y -+ help -+ Enable mitigation for Register File Data Sampling (RFDS) by default. -+ RFDS is a hardware vulnerability which affects Intel Atom CPUs. It -+ allows unprivileged speculative access to stale data previously -+ stored in floating point, vector and integer registers. -+ See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> -+ - endif - - config ARCH_HAS_ADD_PAGES -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 2b62cdd8dd12..8511aad59581 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -503,4 +503,5 @@ - /* BUG word 2 */ - #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ - #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ - #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index f1bd7b91b3c6..d1b5edaf6c34 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -165,6 +165,14 @@ - * CPU is not vulnerable to Gather - * Data Sampling (GDS). - */ -+#define ARCH_CAP_RFDS_NO BIT(27) /* -+ * Not susceptible to Register -+ * File Data Sampling. -+ */ -+#define ARCH_CAP_RFDS_CLEAR BIT(28) /* -+ * VERW clears CPU Register -+ * File. -+ */ - - #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* - * IA32_XAPIC_DISABLE_STATUS MSR -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index cd6ac89c1a0d..01ac18f56147 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -480,6 +480,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) - } - early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); - -+#undef pr_fmt -+#define pr_fmt(fmt) "Register File Data Sampling: " fmt -+ -+enum rfds_mitigations { -+ RFDS_MITIGATION_OFF, -+ RFDS_MITIGATION_VERW, -+ RFDS_MITIGATION_UCODE_NEEDED, -+}; -+ -+/* Default mitigation for Register File Data Sampling */ -+static enum rfds_mitigations rfds_mitigation __ro_after_init = -+ IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; -+ -+static const char * const rfds_strings[] = { -+ [RFDS_MITIGATION_OFF] = "Vulnerable", -+ [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", -+ [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", -+}; -+ -+static void __init rfds_select_mitigation(void) -+{ -+ if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { -+ rfds_mitigation = RFDS_MITIGATION_OFF; -+ return; -+ } -+ if (rfds_mitigation == RFDS_MITIGATION_OFF) -+ return; -+ -+ if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) -+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); -+ else -+ rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; -+} -+ -+static __init int rfds_parse_cmdline(char *str) -+{ -+ if (!str) -+ return -EINVAL; -+ -+ if (!boot_cpu_has_bug(X86_BUG_RFDS)) -+ return 0; -+ -+ if (!strcmp(str, "off")) -+ rfds_mitigation = RFDS_MITIGATION_OFF; -+ else if (!strcmp(str, "on")) -+ rfds_mitigation = RFDS_MITIGATION_VERW; -+ -+ return 0; -+} -+early_param("reg_file_data_sampling", rfds_parse_cmdline); -+ - #undef pr_fmt - #define pr_fmt(fmt) "" fmt - -@@ -513,6 +564,11 @@ static void __init md_clear_update_mitigation(void) - mmio_mitigation = MMIO_MITIGATION_VERW; - mmio_select_mitigation(); - } -+ if (rfds_mitigation == RFDS_MITIGATION_OFF && -+ boot_cpu_has_bug(X86_BUG_RFDS)) { -+ rfds_mitigation = RFDS_MITIGATION_VERW; -+ rfds_select_mitigation(); -+ } - out: - if (boot_cpu_has_bug(X86_BUG_MDS)) - pr_info("MDS: %s\n", mds_strings[mds_mitigation]); -@@ -522,6 +578,8 @@ static void __init md_clear_update_mitigation(void) - pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); - else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) - pr_info("MMIO Stale Data: Unknown: No mitigations\n"); -+ if (boot_cpu_has_bug(X86_BUG_RFDS)) -+ pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); - } - - static void __init md_clear_select_mitigation(void) -@@ -529,11 +587,12 @@ static void __init md_clear_select_mitigation(void) - mds_select_mitigation(); - taa_select_mitigation(); - mmio_select_mitigation(); -+ rfds_select_mitigation(); - - /* -- * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update -- * and print their mitigation after MDS, TAA and MMIO Stale Data -- * mitigation selection is done. -+ * As these mitigations are inter-related and rely on VERW instruction -+ * to clear the microarchitural buffers, update and print their status -+ * after mitigation selection is done for each of these vulnerabilities. - */ - md_clear_update_mitigation(); - } -@@ -2622,6 +2681,11 @@ static ssize_t mmio_stale_data_show_state(char *buf) - sched_smt_active() ? "vulnerable" : "disabled"); - } - -+static ssize_t rfds_show_state(char *buf) -+{ -+ return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); -+} -+ - static char *stibp_state(void) - { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && -@@ -2781,6 +2845,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr - case X86_BUG_GDS: - return gds_show_state(buf); - -+ case X86_BUG_RFDS: -+ return rfds_show_state(buf); -+ - default: - break; - } -@@ -2855,4 +2922,9 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu - { - return cpu_show_common(dev, attr, buf, X86_BUG_GDS); - } -+ -+ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); -+} - #endif -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index fbc4e60d027c..40d8c110bb32 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -1267,6 +1267,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { - #define SRSO BIT(5) - /* CPU is affected by GDS */ - #define GDS BIT(6) -+/* CPU is affected by Register File Data Sampling */ -+#define RFDS BIT(7) - - static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), -@@ -1294,9 +1296,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), -- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), -- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), -- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), -+ VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), -+ VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), - - VULNBL_AMD(0x15, RETBLEED), - VULNBL_AMD(0x16, RETBLEED), -@@ -1330,6 +1341,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap) - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); - } - -+static bool __init vulnerable_to_rfds(u64 ia32_cap) -+{ -+ /* The "immunity" bit trumps everything else: */ -+ if (ia32_cap & ARCH_CAP_RFDS_NO) -+ return false; -+ -+ /* -+ * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to -+ * indicate that mitigation is needed because guest is running on a -+ * vulnerable hardware or may migrate to such hardware: -+ */ -+ if (ia32_cap & ARCH_CAP_RFDS_CLEAR) -+ return true; -+ -+ /* Only consult the blacklist when there is no enumeration: */ -+ return cpu_matches(cpu_vuln_blacklist, RFDS); -+} -+ - static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - { - u64 ia32_cap = x86_read_arch_cap_msr(); -@@ -1441,6 +1470,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - boot_cpu_has(X86_FEATURE_AVX)) - setup_force_cpu_bug(X86_BUG_GDS); - -+ if (vulnerable_to_rfds(ia32_cap)) -+ setup_force_cpu_bug(X86_BUG_RFDS); -+ - if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) - return; - -diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c -index 47de0f140ba6..0b33e81f9c9b 100644 ---- a/drivers/base/cpu.c -+++ b/drivers/base/cpu.c -@@ -588,6 +588,7 @@ CPU_SHOW_VULN_FALLBACK(mmio_stale_data); - CPU_SHOW_VULN_FALLBACK(retbleed); - CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); - CPU_SHOW_VULN_FALLBACK(gds); -+CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); - - static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); - static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); -@@ -602,6 +603,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); - static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); - static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); - static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); -+static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); - - static struct attribute *cpu_root_vulnerabilities_attrs[] = { - &dev_attr_meltdown.attr, -@@ -617,6 +619,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { - &dev_attr_retbleed.attr, - &dev_attr_spec_rstack_overflow.attr, - &dev_attr_gather_data_sampling.attr, -+ &dev_attr_reg_file_data_sampling.attr, - NULL - }; - -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index dcb89c987164..8654714421a0 100644 ---- a/include/linux/cpu.h -+++ b/include/linux/cpu.h -@@ -75,6 +75,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, - struct device_attribute *attr, char *buf); - extern ssize_t cpu_show_gds(struct device *dev, - struct device_attribute *attr, char *buf); -+extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, -+ struct device_attribute *attr, char *buf); - - extern __printf(4, 5) - struct device *cpu_device_create(struct device *parent, void *drvdata, --- -2.43.0 - diff --git a/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch b/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch index 3d8bdf0664..3a10822b3b 100644 --- a/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch +++ b/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch @@ -26,16 +26,16 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org> --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c -@@ -1031,6 +1031,8 @@ void __init setup_arch(char **cmdline_p) +@@ -902,6 +902,8 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); + efi_set_secure_boot(boot_params.secure_boot); + reserve_ibft_region(); - dmi_setup(); + x86_init.resources.dmi_setup(); -@@ -1192,8 +1194,6 @@ void __init setup_arch(char **cmdline_p) +@@ -1063,8 +1065,6 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); @@ -67,7 +67,7 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org> default: --- a/include/linux/security.h +++ b/include/linux/security.h -@@ -482,6 +482,7 @@ int security_inode_notifysecctx(struct i +@@ -486,6 +486,7 @@ int security_inode_notifysecctx(struct i int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); @@ -75,7 +75,7 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org> #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) -@@ -1388,6 +1389,11 @@ static inline int security_locked_down(e +@@ -1404,6 +1405,11 @@ static inline int security_locked_down(e { return 0; } diff --git a/debian/patches/series b/debian/patches/series index 361758bb88..8c1ff52363 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -68,7 +68,6 @@ bugfix/arm/arm-mm-export-__sync_icache_dcache-for-xen-privcmd.patch bugfix/powerpc/powerpc-boot-fix-missing-crc32poly.h-when-building-with-kernel_xz.patch bugfix/arm64/arm64-acpi-Add-fixup-for-HPE-m400-quirks.patch bugfix/alpha/alpha-fix-missing-symbol-versions-for-str-n-cat-cpy.patch -bugfix/x86/platform-x86-p2sb-On-Goldmont-only-cache-P2SB-and-SP.patch # Arch features features/x86/x86-memtest-WARN-if-bad-RAM-found.patch @@ -95,10 +94,6 @@ features/all/db-mok-keyring/trust-machine-keyring-by-default.patch # Security fixes debian/i386-686-pae-pci-set-pci-nobios-by-default.patch debian/ntfs-mark-it-as-broken.patch -bugfix/x86/x86-mmio-Disable-KVM-mitigation-when-X86_FEATURE_CLE.patch -bugfix/x86/Documentation-hw-vuln-Add-documentation-for-RFDS.patch -bugfix/x86/x86-rfds-Mitigate-Register-File-Data-Sampling-RFDS.patch -bugfix/x86/KVM-x86-Export-RFDS_NO-and-RFDS_CLEAR-to-guests.patch # Fix exported symbol versions bugfix/all/module-disable-matching-missing-version-crc.patch |