diff --git a/SPECS-EXTENDED/kernel-rt/config b/SPECS-EXTENDED/kernel-rt/config index 772750328b1..6d3e37401e3 100644 --- a/SPECS-EXTENDED/kernel-rt/config +++ b/SPECS-EXTENDED/kernel-rt/config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86_64 6.6.85.1 Kernel Configuration +# Linux/x86_64 6.6.104.2 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.2.0" CONFIG_CC_IS_GCC=y @@ -546,6 +546,8 @@ CONFIG_CPU_SRSO=y # CONFIG_GDS_FORCE_MITIGATION is not set CONFIG_MITIGATION_RFDS=y CONFIG_MITIGATION_SPECTRE_BHI=y +CONFIG_MITIGATION_ITS=y +CONFIG_MITIGATION_TSA=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -1135,7 +1137,6 @@ CONFIG_SKB_EXTENSIONS=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m CONFIG_UNIX=y -CONFIG_UNIX_SCM=y CONFIG_AF_UNIX_OOB=y CONFIG_UNIX_DIAG=m # CONFIG_TLS is not set @@ -1856,8 +1857,6 @@ CONFIG_ETHTOOL_NETLINK=y # # Device Drivers # -CONFIG_HAVE_EISA=y -# CONFIG_EISA is not set CONFIG_HAVE_PCI=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y @@ -2662,6 +2661,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_SW_STEERING=y +# CONFIG_MLX5_HW_STEERING is not set # CONFIG_MLX5_SF is not set CONFIG_MLXSW_CORE=m CONFIG_MLXSW_CORE_HWMON=y @@ -6081,7 +6081,6 @@ CONFIG_DELL_WMI_DDV=m # CONFIG_X86_PLATFORM_DRIVERS_HP is not set # CONFIG_WIRELESS_HOTKEY is not set # CONFIG_IBM_RTL is not set -# CONFIG_LENOVO_YMC is not set # CONFIG_SENSORS_HDAPS is not set # CONFIG_THINKPAD_ACPI is not set # CONFIG_THINKPAD_LMI is not set diff --git a/SPECS-EXTENDED/kernel-rt/kernel-rt.signatures.json b/SPECS-EXTENDED/kernel-rt/kernel-rt.signatures.json index 90f05c26628..8f27b68d823 100644 --- a/SPECS-EXTENDED/kernel-rt/kernel-rt.signatures.json +++ b/SPECS-EXTENDED/kernel-rt/kernel-rt.signatures.json @@ -1,10 +1,10 @@ { "Signatures": { "cbl-mariner-ca-20211013.pem": "5ef124b0924cb1047c111a0ecff1ae11e6ad7cac8d1d9b40f98f99334121f0b0", - "config": "96d8e41cf334f308715713d6e20656f8e1b8ac1475ceeb209f9e217b01447eed", + "config": "77d0c707e0ea2c9bbbf39fda825f19cecb97b1286b8f56770a412a7b2349ed08", "cpupower": "d7518767bf2b1110d146a49c7d42e76b803f45eb8bd14d931aa6d0d346fae985", "cpupower.service": "b057fe9e5d0e8c36f485818286b80e3eba8ff66ff44797940e99b1fd5361bb98", "sha512hmac-openssl.sh": "02ab91329c4be09ee66d759e4d23ac875037c3b56e5a598e32fd1206da06a27f", - "kernel-6.6.85.1.tar.gz": "4dab471d68ce07dd31e925788c128ff1c7d9a6d2c7e0a073bd8e6701514cfee6" + "kernel-6.6.104.2.tar.gz": "dfc7b852fd9e63ce199c35721d00a5e4741c4e881da8235129f853402d784aa2" } } diff --git a/SPECS-EXTENDED/kernel-rt/kernel-rt.spec b/SPECS-EXTENDED/kernel-rt/kernel-rt.spec index 0808c93e29c..47ac04ea625 100644 --- a/SPECS-EXTENDED/kernel-rt/kernel-rt.spec +++ b/SPECS-EXTENDED/kernel-rt/kernel-rt.spec @@ -1,6 +1,6 @@ %global security_hardening none %global sha512hmac bash %{_sourcedir}/sha512hmac-openssl.sh -%global rt_version rt53 +%global rt_version rt60 %define uname_r %{version}-%{rt_version}-%{release} %define mariner_version 3 %define version_upstream %(echo %{version} | rev | cut -d'.' -f2- | rev) @@ -24,8 +24,8 @@ Summary: Realtime Linux Kernel Name: kernel-rt -Version: 6.6.85.1 -Release: 3%{?dist} +Version: 6.6.104.2 +Release: 1%{?dist} License: GPLv2 Vendor: Microsoft Corporation Distribution: Azure Linux @@ -426,6 +426,9 @@ ln -sf linux-%{uname_r}.cfg /boot/mariner.cfg %{_sysconfdir}/bash_completion.d/bpftool %changelog +* Tue Sep 30 2025 CBL-Mariner Servicing Account - 6.6.104.2-1 +- Auto-upgrade to 6.6.104.2 + * Fri Aug 22 2025 Siddharth Chintamaneni - 6.6.85.1-3 - Introducing kernel-hwe diff --git a/SPECS-EXTENDED/kernel-rt/patch-6.6.85-rt53.patch b/SPECS-EXTENDED/kernel-rt/patch-6.6.104-rt60.patch similarity index 52% rename from SPECS-EXTENDED/kernel-rt/patch-6.6.85-rt53.patch rename to SPECS-EXTENDED/kernel-rt/patch-6.6.104-rt60.patch index e74fbdccd3a..f3d640ce6eb 100644 --- a/SPECS-EXTENDED/kernel-rt/patch-6.6.85-rt53.patch +++ b/SPECS-EXTENDED/kernel-rt/patch-6.6.104-rt60.patch @@ -1,799 +1,1037 @@ -diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig -index 57c0448d0..e5a283851 100644 ---- a/arch/arm/Kconfig -+++ b/arch/arm/Kconfig -@@ -34,6 +34,7 @@ config ARM - select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 - select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE -+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_USE_MEMTEST -@@ -73,7 +74,7 @@ config ARM - select HAS_IOPORT - select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT - select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 -- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU -+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT - select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL - select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU - select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL -@@ -96,7 +97,7 @@ config ARM - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU - select HAVE_EXIT_THREAD -- select HAVE_FAST_GUP if ARM_LPAE -+ select HAVE_FAST_GUP if ARM_LPAE && !(PREEMPT_RT && HIGHPTE) - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUNCTION_GRAPH_TRACER -@@ -118,6 +119,7 @@ config ARM - select HAVE_PERF_EVENTS - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP -+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM - select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RSEQ -diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c -index fef62e4a9..622a30243 100644 ---- a/arch/arm/mm/fault.c -+++ b/arch/arm/mm/fault.c -@@ -404,6 +404,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, - if (addr < TASK_SIZE) - return do_page_fault(addr, fsr, regs); - -+ if (interrupts_enabled(regs)) -+ local_irq_enable(); -+ - if (user_mode(regs)) - goto bad_area; +From 6d2a1496f391c86abcc4729dd0283165e3ce8992 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 8 Sep 2023 18:22:48 +0200 +Subject: [PATCH 001/213] sched: Constrain locks in sched_submit_work() + +Even though sched_submit_work() is ran from preemptible context, +it is discouraged to have it use blocking locks due to the recursion +potential. + +Enforce this. + +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230908162254.999499-2-bigeasy@linutronix.de +--- + kernel/sched/core.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 1b5e4389f788..904a60c13ab8 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6722,11 +6722,18 @@ void __noreturn do_task_dead(void) -@@ -474,6 +477,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, - static int - do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + static inline void sched_submit_work(struct task_struct *tsk) { -+ if (interrupts_enabled(regs)) -+ local_irq_enable(); ++ static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG); + unsigned int task_flags; + + if (task_is_running(tsk)) + return; + ++ /* ++ * Establish LD_WAIT_CONFIG context to ensure none of the code called ++ * will use a blocking primitive -- which would lead to recursion. ++ */ ++ lock_map_acquire_try(&sched_map); + - do_bad_area(addr, fsr, regs); - return 0; + task_flags = tsk->flags; + /* + * If a worker goes to sleep, notify and ask workqueue whether it +@@ -6751,6 +6758,8 @@ static inline void sched_submit_work(struct task_struct *tsk) + * make sure to submit it to avoid deadlocks. + */ + blk_flush_plug(tsk->plug, true); ++ ++ lock_map_release(&sched_map); } -diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c -index 7e8773a2d..9fde36fcb 100644 ---- a/arch/arm/vfp/vfpmodule.c -+++ b/arch/arm/vfp/vfpmodule.c -@@ -55,6 +55,34 @@ extern unsigned int VFP_arch_feroceon __alias(VFP_arch); - */ - union vfp_state *vfp_current_hw_state[NR_CPUS]; -+/* -+ * Claim ownership of the VFP unit. -+ * -+ * The caller may change VFP registers until vfp_unlock() is called. -+ * -+ * local_bh_disable() is used to disable preemption and to disable VFP -+ * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is -+ * not sufficient because it only serializes soft interrupt related sections -+ * via a local lock, but stays preemptible. Disabling preemption is the right -+ * choice here as bottom half processing is always in thread context on RT -+ * kernels so it implicitly prevents bottom half processing as well. -+ */ -+static void vfp_lock(void) + static void sched_update_worker(struct task_struct *tsk) +-- +2.51.0 + +From c4df7971c25aa6aca5d5e55aa498c3b3b22da669 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 8 Sep 2023 18:22:49 +0200 +Subject: [PATCH 002/213] locking/rtmutex: Avoid unconditional slowpath for + DEBUG_RT_MUTEXES + +With DEBUG_RT_MUTEXES enabled the fast-path rt_mutex_cmpxchg_acquire() +always fails and all lock operations take the slow path. + +Provide a new helper inline rt_mutex_try_acquire() which maps to +rt_mutex_cmpxchg_acquire() in the non-debug case. For the debug case +it invokes rt_mutex_slowtrylock() which can acquire a non-contended +rtmutex under full debug coverage. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230908162254.999499-3-bigeasy@linutronix.de +--- + kernel/locking/rtmutex.c | 21 ++++++++++++++++++++- + kernel/locking/ww_rt_mutex.c | 2 +- + 2 files changed, 21 insertions(+), 2 deletions(-) + +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index bf3a28ee7d8f..ad15a7d81e59 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, + return try_cmpxchg_acquire(&lock->owner, &old, new); + } + ++static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) +{ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_bh_disable(); -+ else -+ preempt_disable(); ++ return rt_mutex_cmpxchg_acquire(lock, NULL, current); +} + -+static void vfp_unlock(void) + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, + struct task_struct *old, + struct task_struct *new) +@@ -297,6 +302,20 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, + + } + ++static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); ++ ++static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) +{ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_bh_enable(); -+ else -+ preempt_enable(); ++ /* ++ * With debug enabled rt_mutex_cmpxchg trylock() will always fail. ++ * ++ * Avoid unconditionally taking the slow path by using ++ * rt_mutex_slow_trylock() which is covered by the debug code and can ++ * acquire a non-contended rtmutex. ++ */ ++ return rt_mutex_slowtrylock(lock); +} + - /* - * Is 'thread's most up to date state stored in this CPUs hardware? - * Must be called from non-preemptible context. -@@ -240,7 +268,7 @@ static void vfp_panic(char *reason, u32 inst) - /* - * Process bitmask of exception conditions. - */ --static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs) -+static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr) + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, + struct task_struct *old, + struct task_struct *new) +@@ -1756,7 +1775,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, + static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, + unsigned int state) { - int si_code = 0; +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ if (likely(rt_mutex_try_acquire(lock))) + return 0; -@@ -248,8 +276,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ + return rt_mutex_slowlock(lock, NULL, state); +diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c +index d1473c624105..c7196de838ed 100644 +--- a/kernel/locking/ww_rt_mutex.c ++++ b/kernel/locking/ww_rt_mutex.c +@@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, + } + mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip); - if (exceptions == VFP_EXCEPTION_ERROR) { - vfp_panic("unhandled bounce", inst); -- vfp_raise_sigfpe(FPE_FLTINV, regs); -- return; -+ return FPE_FLTINV; +- if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) { ++ if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) { + if (ww_ctx) + ww_mutex_set_context_fastpath(lock, ww_ctx); + return 0; +-- +2.51.0 + +From 71eb4202ac0cb9dd4f5322ca3a14a19986265cc1 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 8 Sep 2023 18:22:50 +0200 +Subject: [PATCH 003/213] sched: Extract __schedule_loop() + +There are currently two implementations of this basic __schedule() +loop, and there is soon to be a third. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230908162254.999499-4-bigeasy@linutronix.de +--- + kernel/sched/core.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 904a60c13ab8..9060719679dd 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6772,16 +6772,21 @@ static void sched_update_worker(struct task_struct *tsk) } + } - /* -@@ -277,8 +304,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ - RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF); - RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV); +-asmlinkage __visible void __sched schedule(void) ++static __always_inline void __schedule_loop(unsigned int sched_mode) + { +- struct task_struct *tsk = current; +- +- sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(SM_NONE); ++ __schedule(sched_mode); + sched_preempt_enable_no_resched(); + } while (need_resched()); ++} ++ ++asmlinkage __visible void __sched schedule(void) ++{ ++ struct task_struct *tsk = current; ++ ++ sched_submit_work(tsk); ++ __schedule_loop(SM_NONE); + sched_update_worker(tsk); + } + EXPORT_SYMBOL(schedule); +@@ -6845,11 +6850,7 @@ void __sched schedule_preempt_disabled(void) + #ifdef CONFIG_PREEMPT_RT + void __sched notrace schedule_rtlock(void) + { +- do { +- preempt_disable(); +- __schedule(SM_RTLOCK_WAIT); +- sched_preempt_enable_no_resched(); +- } while (need_resched()); ++ __schedule_loop(SM_RTLOCK_WAIT); + } + NOKPROBE_SYMBOL(schedule_rtlock); + #endif +-- +2.51.0 + +From f5bc285d2484506550a8b68614dc65fd7a4da4e0 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 8 Sep 2023 18:22:51 +0200 +Subject: [PATCH 004/213] sched: Provide rt_mutex specific scheduler helpers + +With PREEMPT_RT there is a rt_mutex recursion problem where +sched_submit_work() can use an rtlock (aka spinlock_t). More +specifically what happens is: + + mutex_lock() /* really rt_mutex */ + ... + __rt_mutex_slowlock_locked() + task_blocks_on_rt_mutex() + // enqueue current task as waiter + // do PI chain walk + rt_mutex_slowlock_block() + schedule() + sched_submit_work() + ... + spin_lock() /* really rtlock */ + ... + __rt_mutex_slowlock_locked() + task_blocks_on_rt_mutex() + // enqueue current task as waiter *AGAIN* + // *CONFUSION* + +Fix this by making rt_mutex do the sched_submit_work() early, before +it enqueues itself as a waiter -- before it even knows *if* it will +wait. + +[[ basically Thomas' patch but with different naming and a few asserts + added ]] + +Originally-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230908162254.999499-5-bigeasy@linutronix.de +--- + include/linux/sched.h | 3 +++ + include/linux/sched/rt.h | 4 ++++ + kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++---- + 3 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index cb38eee732fd..87f1bf06857e 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -915,6 +915,9 @@ struct task_struct { + * ->sched_remote_wakeup gets used, so it can be in this word. + */ + unsigned sched_remote_wakeup:1; ++#ifdef CONFIG_RT_MUTEXES ++ unsigned sched_rt_mutex:1; ++#endif -- if (si_code) -- vfp_raise_sigfpe(si_code, regs); -+ return si_code; + /* Bit to tell LSMs we're in execve(): */ + unsigned in_execve:1; +diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h +index 994c25640e15..b2b9e6eb9683 100644 +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } + #ifdef CONFIG_RT_MUTEXES ++extern void rt_mutex_pre_schedule(void); ++extern void rt_mutex_schedule(void); ++extern void rt_mutex_post_schedule(void); ++ /* -@@ -324,6 +350,8 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) - static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) - { - u32 fpscr, orig_fpscr, fpsid, exceptions; -+ int si_code2 = 0; -+ int si_code = 0; - - pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); + * Must hold either p->pi_lock or task_rq(p)->lock. + */ +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 9060719679dd..1b7afbd306c3 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6725,9 +6725,6 @@ static inline void sched_submit_work(struct task_struct *tsk) + static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG); + unsigned int task_flags; -@@ -369,8 +397,8 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) - * unallocated VFP instruction but with FPSCR.IXE set and not - * on VFP subarch 1. - */ -- vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); +- if (task_is_running(tsk)) - return; -+ si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr); -+ goto exit; - } - +- /* -@@ -394,14 +422,14 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) - */ - exceptions = vfp_emulate_instruction(trigger, fpscr, regs); - if (exceptions) -- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); -+ si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); + * Establish LD_WAIT_CONFIG context to ensure none of the code called + * will use a blocking primitive -- which would lead to recursion. +@@ -6785,7 +6782,12 @@ asmlinkage __visible void __sched schedule(void) + { + struct task_struct *tsk = current; - /* - * If there isn't a second FP instruction, exit now. Note that - * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. - */ - if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) -- return; -+ goto exit; - - /* - * The barrier() here prevents fpinst2 being read -@@ -413,7 +441,13 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) - emulate: - exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); - if (exceptions) -- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); -+ si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); -+exit: -+ vfp_unlock(); -+ if (si_code2) -+ vfp_raise_sigfpe(si_code2, regs); -+ if (si_code) -+ vfp_raise_sigfpe(si_code, regs); +- sched_submit_work(tsk); ++#ifdef CONFIG_RT_MUTEXES ++ lockdep_assert(!tsk->sched_rt_mutex); ++#endif ++ ++ if (!task_is_running(tsk)) ++ sched_submit_work(tsk); + __schedule_loop(SM_NONE); + sched_update_worker(tsk); } +@@ -7046,6 +7048,32 @@ static void __setscheduler_prio(struct task_struct *p, int prio) - static void vfp_enable(void *unused) -@@ -512,11 +546,9 @@ static inline void vfp_pm_init(void) { } - */ - void vfp_sync_hwstate(struct thread_info *thread) + #ifdef CONFIG_RT_MUTEXES + ++/* ++ * Would be more useful with typeof()/auto_type but they don't mix with ++ * bit-fields. Since it's a local thing, use int. Keep the generic sounding ++ * name such that if someone were to implement this function we get to compare ++ * notes. ++ */ ++#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; }) ++ ++void rt_mutex_pre_schedule(void) ++{ ++ lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1)); ++ sched_submit_work(current); ++} ++ ++void rt_mutex_schedule(void) ++{ ++ lockdep_assert(current->sched_rt_mutex); ++ __schedule_loop(SM_NONE); ++} ++ ++void rt_mutex_post_schedule(void) ++{ ++ sched_update_worker(current); ++ lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0)); ++} ++ + static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) { -- unsigned int cpu = get_cpu(); -+ vfp_lock(); + if (pi_task) +-- +2.51.0 + +From 6bd52690d92cc002bbee2b6b866a5f84762a513e Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 8 Sep 2023 18:22:52 +0200 +Subject: [PATCH 005/213] locking/rtmutex: Use rt_mutex specific scheduler + helpers + +Have rt_mutex use the rt_mutex specific scheduler helpers to avoid +recursion vs rtlock on the PI state. + +[[ peterz: adapted to new names ]] + +Reported-by: Crystal Wood +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230908162254.999499-6-bigeasy@linutronix.de +--- + kernel/futex/pi.c | 11 +++++++++++ + kernel/locking/rtmutex.c | 14 ++++++++++++-- + kernel/locking/rwbase_rt.c | 6 ++++++ + kernel/locking/rwsem.c | 8 +++++++- + kernel/locking/spinlock_rt.c | 4 ++++ + 5 files changed, 40 insertions(+), 3 deletions(-) + +diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c +index ce2889f12375..f8e65b27d9d6 100644 +--- a/kernel/futex/pi.c ++++ b/kernel/futex/pi.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0-or-later -- local_bh_disable(); -- -- if (vfp_state_in_hw(cpu, thread)) { -+ if (vfp_state_in_hw(raw_smp_processor_id(), thread)) { - u32 fpexc = fmrx(FPEXC); + #include ++#include + #include - /* -@@ -527,8 +559,7 @@ void vfp_sync_hwstate(struct thread_info *thread) - fmxr(FPEXC, fpexc); + #include "futex.h" +@@ -1002,6 +1003,12 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl + goto no_block; } -- local_bh_enable(); -- put_cpu(); -+ vfp_unlock(); - } - - /* Ensure that the thread reloads the hardware VFP state on the next use. */ -@@ -683,7 +714,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) - if (!user_mode(regs)) - return vfp_kmode_exception(regs, trigger); ++ /* ++ * Must be done before we enqueue the waiter, here is unfortunately ++ * under the hb lock, but that *should* work because it does nothing. ++ */ ++ rt_mutex_pre_schedule(); ++ + rt_mutex_init_waiter(&rt_waiter); -- local_bh_disable(); -+ vfp_lock(); - fpexc = fmrx(FPEXC); + /* +@@ -1052,6 +1059,10 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl + if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) + ret = 0; ++ /* ++ * Waiter is unqueued. ++ */ ++ rt_mutex_post_schedule(); + no_block: /* -@@ -748,6 +779,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) - * replay the instruction that trapped. - */ - fmxr(FPEXC, fpexc); -+ vfp_unlock(); - } else { - /* Check for synchronous or asynchronous exceptions */ - if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { -@@ -762,17 +794,17 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) - if (!(fpscr & FPSCR_IXE)) { - if (!(fpscr & FPSCR_LENGTH_MASK)) { - pr_debug("not VFP\n"); -- local_bh_enable(); -+ vfp_unlock(); - return -ENOEXEC; - } - fpexc |= FPEXC_DEX; - } - } - bounce: regs->ARM_pc += 4; -+ /* VFP_bounce() will invoke vfp_unlock() */ - VFP_bounce(trigger, fpexc, regs); - } + * Fixup the pi_state owner and possibly acquire the lock if we +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index ad15a7d81e59..f9c66d1415d6 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1632,7 +1632,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, + raw_spin_unlock_irq(&lock->wait_lock); -- local_bh_enable(); - return 0; - } + if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) +- schedule(); ++ rt_mutex_schedule(); -@@ -819,7 +851,7 @@ void kernel_neon_begin(void) - unsigned int cpu; - u32 fpexc; + raw_spin_lock_irq(&lock->wait_lock); + set_current_state(state); +@@ -1662,7 +1662,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, -- local_bh_disable(); -+ vfp_lock(); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); +- schedule(); ++ rt_mutex_schedule(); + } + } + +@@ -1757,6 +1757,15 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, + unsigned long flags; + int ret; ++ /* ++ * Do all pre-schedule work here, before we queue a waiter and invoke ++ * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would ++ * otherwise recurse back into task_blocks_on_rt_mutex() through ++ * rtlock_slowlock() and will then enqueue a second waiter for this ++ * same task and things get really confusing real fast. ++ */ ++ rt_mutex_pre_schedule(); ++ /* - * Kernel mode NEON is only allowed outside of hardirq context with -@@ -850,7 +882,7 @@ void kernel_neon_end(void) - { - /* Disable the NEON/VFP unit. */ - fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); -- local_bh_enable(); -+ vfp_unlock(); + * Technically we could use raw_spin_[un]lock_irq() here, but this can + * be called in early boot if the cmpxchg() fast path is disabled +@@ -1768,6 +1777,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, + raw_spin_lock_irqsave(&lock->wait_lock, flags); + ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ rt_mutex_post_schedule(); + + return ret; } - EXPORT_SYMBOL(kernel_neon_end); +diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c +index 25ec0239477c..c7258cb32d91 100644 +--- a/kernel/locking/rwbase_rt.c ++++ b/kernel/locking/rwbase_rt.c +@@ -71,6 +71,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, + struct rt_mutex_base *rtm = &rwb->rtmutex; + int ret; -diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig -index 658c6a61a..f5d5c265f 100644 ---- a/arch/arm64/Kconfig -+++ b/arch/arm64/Kconfig -@@ -97,6 +97,7 @@ config ARM64 - select ARCH_SUPPORTS_NUMA_BALANCING - select ARCH_SUPPORTS_PAGE_TABLE_CHECK - select ARCH_SUPPORTS_PER_VMA_LOCK -+ select ARCH_SUPPORTS_RT - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT - select ARCH_WANT_DEFAULT_BPF_JIT -diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index 6baa8b856..cca3f7ca6 100644 ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -166,6 +166,7 @@ config PPC - select ARCH_STACKWALK - select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x -+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select ARCH_USE_MEMTEST -@@ -268,6 +269,7 @@ config PPC - select HAVE_PERF_USER_STACK_DUMP - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE -+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM - select HAVE_RSEQ - select HAVE_SETUP_PER_CPU_AREA if PPC64 - select HAVE_SOFTIRQ_ON_OWN_STACK -diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h -index 283c34647..4727f4005 100644 ---- a/arch/powerpc/include/asm/stackprotector.h -+++ b/arch/powerpc/include/asm/stackprotector.h -@@ -19,8 +19,13 @@ - */ - static __always_inline void boot_init_stack_canary(void) - { -- unsigned long canary = get_random_canary(); -+ unsigned long canary; ++ rwbase_pre_schedule(); + raw_spin_lock_irq(&rtm->wait_lock); -+#ifndef CONFIG_PREEMPT_RT -+ canary = get_random_canary(); -+#else -+ canary = ((unsigned long)&canary) & CANARY_MASK; -+#endif - current->stack_canary = canary; - #ifdef CONFIG_PPC64 - get_paca()->canary = canary; -diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c -index 2de7f6dcd..739f5b179 100644 ---- a/arch/powerpc/kernel/traps.c -+++ b/arch/powerpc/kernel/traps.c -@@ -261,12 +261,17 @@ static char *get_mmu_str(void) + /* +@@ -125,6 +126,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, + rwbase_rtmutex_unlock(rtm); - static int __die(const char *str, struct pt_regs *regs, long err) - { -+ const char *pr = ""; -+ - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); + trace_contention_end(rwb, ret); ++ rwbase_post_schedule(); + return ret; + } -+ if (IS_ENABLED(CONFIG_PREEMPTION)) -+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; +@@ -237,6 +239,8 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, + /* Force readers into slow path */ + atomic_sub(READER_BIAS, &rwb->readers); + ++ rwbase_pre_schedule(); + - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", - IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", - PAGE_SIZE / 1024, get_mmu_str(), -- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", -+ pr, - IS_ENABLED(CONFIG_SMP) ? " SMP" : "", - IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", - debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", -diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig -index 902611954..2f188137f 100644 ---- a/arch/powerpc/kvm/Kconfig -+++ b/arch/powerpc/kvm/Kconfig -@@ -224,6 +224,7 @@ config KVM_E500MC - config KVM_MPIC - bool "KVM in-kernel MPIC emulation" - depends on KVM && PPC_E500 -+ depends on !PREEMPT_RT - select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD - select HAVE_KVM_IRQ_ROUTING -diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig -index 4ebf2ef28..381c3be3b 100644 ---- a/arch/powerpc/platforms/pseries/Kconfig -+++ b/arch/powerpc/platforms/pseries/Kconfig -@@ -2,6 +2,7 @@ - config PPC_PSERIES - depends on PPC64 && PPC_BOOK3S - bool "IBM pSeries & new (POWER5-based) iSeries" -+ select GENERIC_ALLOCATOR - select HAVE_PCSPKR_PLATFORM - select MPIC - select OF_DYNAMIC -diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c -index b1e6d275c..9a8d3970d 100644 ---- a/arch/powerpc/platforms/pseries/iommu.c -+++ b/arch/powerpc/platforms/pseries/iommu.c -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -206,7 +207,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, - return ret; - } - --static DEFINE_PER_CPU(__be64 *, tce_page); -+struct tce_page { -+ __be64 * page; -+ local_lock_t lock; -+}; -+static DEFINE_PER_CPU(struct tce_page, tce_page) = { -+ .lock = INIT_LOCAL_LOCK(lock), -+}; - - static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - long npages, unsigned long uaddr, -@@ -229,9 +236,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - direction, attrs); - } + raw_spin_lock_irqsave(&rtm->wait_lock, flags); + if (__rwbase_write_trylock(rwb)) + goto out_unlock; +@@ -248,6 +252,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, + if (rwbase_signal_pending_state(state, current)) { + rwbase_restore_current_state(); + __rwbase_write_unlock(rwb, 0, flags); ++ rwbase_post_schedule(); + trace_contention_end(rwb, -EINTR); + return -EINTR; + } +@@ -266,6 +271,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, -- local_irq_save(flags); /* to protect tcep and the page behind it */ -+ /* to protect tcep and the page behind it */ -+ local_lock_irqsave(&tce_page.lock, flags); + out_unlock: + raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); ++ rwbase_post_schedule(); + return 0; + } -- tcep = __this_cpu_read(tce_page); -+ tcep = __this_cpu_read(tce_page.page); +diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c +index 11ed7ce6579e..62eac9fd809a 100644 +--- a/kernel/locking/rwsem.c ++++ b/kernel/locking/rwsem.c +@@ -1427,8 +1427,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem) + #define rwbase_signal_pending_state(state, current) \ + signal_pending_state(state, current) - /* This is safe to do since interrupts are off when we're called - * from iommu_alloc{,_sg}() -@@ -240,12 +248,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - tcep = (__be64 *)__get_free_page(GFP_ATOMIC); - /* If allocation fails, fall back to the loop implementation */ - if (!tcep) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&tce_page.lock, flags); - return tce_build_pSeriesLP(tbl->it_index, tcenum, - tceshift, - npages, uaddr, direction, attrs); - } -- __this_cpu_write(tce_page, tcep); -+ __this_cpu_write(tce_page.page, tcep); - } ++#define rwbase_pre_schedule() \ ++ rt_mutex_pre_schedule() ++ + #define rwbase_schedule() \ +- schedule() ++ rt_mutex_schedule() ++ ++#define rwbase_post_schedule() \ ++ rt_mutex_post_schedule() - rpn = __pa(uaddr) >> tceshift; -@@ -275,7 +283,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - tcenum += limit; - } while (npages > 0 && !rc); + #include "rwbase_rt.c" -- local_irq_restore(flags); -+ local_unlock_irqrestore(&tce_page.lock, flags); +diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c +index 48a19ed8486d..842037b2ba54 100644 +--- a/kernel/locking/spinlock_rt.c ++++ b/kernel/locking/spinlock_rt.c +@@ -184,9 +184,13 @@ static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm) - if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { - ret = (int)rc; -@@ -459,16 +467,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, - DMA_BIDIRECTIONAL, 0); - } + #define rwbase_signal_pending_state(state, current) (0) -- local_irq_disable(); /* to protect tcep and the page behind it */ -- tcep = __this_cpu_read(tce_page); -+ /* to protect tcep and the page behind it */ -+ local_lock_irq(&tce_page.lock); -+ tcep = __this_cpu_read(tce_page.page); ++#define rwbase_pre_schedule() ++ + #define rwbase_schedule() \ + schedule_rtlock() - if (!tcep) { - tcep = (__be64 *)__get_free_page(GFP_ATOMIC); - if (!tcep) { -- local_irq_enable(); -+ local_unlock_irq(&tce_page.lock); - return -ENOMEM; - } -- __this_cpu_write(tce_page, tcep); -+ __this_cpu_write(tce_page.page, tcep); - } ++#define rwbase_post_schedule() ++ + #include "rwbase_rt.c" + /* + * The common functions which get wrapped into the rwlock API. +-- +2.51.0 + +From 1b48b0790b6d9ba2b636495f7fa6ed9d56fa72a9 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 8 Sep 2023 18:22:53 +0200 +Subject: [PATCH 006/213] locking/rtmutex: Add a lockdep assert to catch + potential nested blocking + +There used to be a BUG_ON(current->pi_blocked_on) in the lock acquisition +functions, but that vanished in one of the rtmutex overhauls. + +Bring it back in form of a lockdep assert to catch code paths which take +rtmutex based locks with current::pi_blocked_on != NULL. + +Reported-by: Crystal Wood +Signed-off-by: Thomas Gleixner +Signed-off-by: "Peter Zijlstra (Intel)" +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230908162254.999499-7-bigeasy@linutronix.de +--- + kernel/locking/rtmutex.c | 2 ++ + kernel/locking/rwbase_rt.c | 2 ++ + kernel/locking/spinlock_rt.c | 2 ++ + 3 files changed, 6 insertions(+) + +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index f9c66d1415d6..99129c89120a 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1785,6 +1785,8 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, + static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, + unsigned int state) + { ++ lockdep_assert(!current->pi_blocked_on); ++ + if (likely(rt_mutex_try_acquire(lock))) + return 0; - proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; -@@ -511,7 +520,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, +diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c +index c7258cb32d91..34a59569db6b 100644 +--- a/kernel/locking/rwbase_rt.c ++++ b/kernel/locking/rwbase_rt.c +@@ -133,6 +133,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, + static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, + unsigned int state) + { ++ lockdep_assert(!current->pi_blocked_on); ++ + if (rwbase_read_trylock(rwb)) + return 0; - /* error cleanup: caller will clear whole range */ +diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c +index 842037b2ba54..38e292454fcc 100644 +--- a/kernel/locking/spinlock_rt.c ++++ b/kernel/locking/spinlock_rt.c +@@ -37,6 +37,8 @@ -- local_irq_enable(); -+ local_unlock_irq(&tce_page.lock); - return rc; + static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) + { ++ lockdep_assert(!current->pi_blocked_on); ++ + if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) + rtlock_slowlock(rtm); } - -diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig -index 130499223..391adf8ff 100644 ---- a/arch/riscv/Kconfig -+++ b/arch/riscv/Kconfig -@@ -49,6 +49,7 @@ config RISCV - select ARCH_SUPPORTS_HUGETLBFS if MMU - select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU - select ARCH_SUPPORTS_PER_VMA_LOCK if MMU -+ select ARCH_SUPPORTS_RT - select ARCH_USE_MEMTEST - select ARCH_USE_QUEUED_RWLOCKS - select ARCH_USES_CFI_TRAPS if CFI_CLANG -@@ -136,6 +137,7 @@ config RISCV - select HAVE_PERF_USER_STACK_DUMP - select HAVE_POSIX_CPU_TIMERS_TASK_WORK - select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL -+ select HAVE_PREEMPT_AUTO - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RETHOOK if !XIP_KERNEL - select HAVE_RSEQ -diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h -index 8c72d1bcd..c59ad9b7a 100644 ---- a/arch/riscv/include/asm/thread_info.h -+++ b/arch/riscv/include/asm/thread_info.h -@@ -86,6 +86,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); - * - pending work-to-be-done flags are in lowest half-word - * - other flags in upper half-word(s) +-- +2.51.0 + +From 77070fb54036732535dab3a05bcd2a2232e45315 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 15 Sep 2023 17:19:44 +0200 +Subject: [PATCH 007/213] futex/pi: Fix recursive rt_mutex waiter state + +Some new assertions pointed out that the existing code has nested rt_mutex wait +state in the futex code. + +Specifically, the futex_lock_pi() cancel case uses spin_lock() while there +still is a rt_waiter enqueued for this task, resulting in a state where there +are two waiters for the same task (and task_struct::pi_blocked_on gets +scrambled). + +The reason to take hb->lock at this point is to avoid the wake_futex_pi() +EAGAIN case. + +This happens when futex_top_waiter() and rt_mutex_top_waiter() state becomes +inconsistent. The current rules are such that this inconsistency will not be +observed. + +Notably the case that needs to be avoided is where futex_lock_pi() and +futex_unlock_pi() interleave such that unlock will fail to observe a new +waiter. + +*However* the case at hand is where a waiter is leaving, in this case the race +means a waiter that is going away is not observed -- which is harmless, +provided this race is explicitly handled. + +This is a somewhat dangerous proposition because the converse race is not +observing a new waiter, which must absolutely not happen. But since the race is +valid this cannot be asserted. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Thomas Gleixner +Reviewed-by: Sebastian Andrzej Siewior +Tested-by: Sebastian Andrzej Siewior +Link: https://lkml.kernel.org/r/20230915151943.GD6743@noisy.programming.kicks-ass.net +--- + kernel/futex/pi.c | 76 ++++++++++++++++++++++++++---------------- + kernel/futex/requeue.c | 6 ++-- + 2 files changed, 52 insertions(+), 30 deletions(-) + +diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c +index f8e65b27d9d6..d636a1bbd7d0 100644 +--- a/kernel/futex/pi.c ++++ b/kernel/futex/pi.c +@@ -611,29 +611,16 @@ int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, + /* + * Caller must hold a reference on @pi_state. */ -+#define TIF_ARCH_RESCHED_LAZY 0 /* Lazy rescheduling */ - #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -@@ -100,6 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) - #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) - #define _TIF_UPROBE (1 << TIF_UPROBE) -+#define _TIF_ARCH_RESCHED_LAZY (1 << TIF_ARCH_RESCHED_LAZY) +-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) ++static int wake_futex_pi(u32 __user *uaddr, u32 uval, ++ struct futex_pi_state *pi_state, ++ struct rt_mutex_waiter *top_waiter) + { +- struct rt_mutex_waiter *top_waiter; + struct task_struct *new_owner; + bool postunlock = false; + DEFINE_RT_WAKE_Q(wqh); + u32 curval, newval; + int ret = 0; - #define _TIF_WORK_MASK \ - (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ -diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c -index 3f0ad09e1..66d2b0455 100644 ---- a/arch/riscv/kernel/cpufeature.c -+++ b/arch/riscv/kernel/cpufeature.c -@@ -8,6 +8,7 @@ +- top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); +- if (WARN_ON_ONCE(!top_waiter)) { +- /* +- * As per the comment in futex_unlock_pi() this should not happen. +- * +- * When this happens, give up our locks and try again, giving +- * the futex_lock_pi() instance time to complete, either by +- * waiting on the rtmutex or removing itself from the futex +- * queue. +- */ +- ret = -EAGAIN; +- goto out_unlock; +- } +- + new_owner = top_waiter->task; - #include - #include -+#include - #include - #include - #include -@@ -30,6 +31,7 @@ + /* +@@ -1046,19 +1033,33 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl + ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); - #define MISALIGNED_ACCESS_JIFFIES_LG2 1 - #define MISALIGNED_BUFFER_SIZE 0x4000 -+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) - #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + cleanup: +- spin_lock(q.lock_ptr); + /* + * If we failed to acquire the lock (deadlock/signal/timeout), we must +- * first acquire the hb->lock before removing the lock from the +- * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait +- * lists consistent. ++ * must unwind the above, however we canont lock hb->lock because ++ * rt_mutex already has a waiter enqueued and hb->lock can itself try ++ * and enqueue an rt_waiter through rtlock. ++ * ++ * Doing the cleanup without holding hb->lock can cause inconsistent ++ * state between hb and pi_state, but only in the direction of not ++ * seeing a waiter that is leaving. ++ * ++ * See futex_unlock_pi(), it deals with this inconsistency. + * +- * In particular; it is important that futex_unlock_pi() can not +- * observe this inconsistency. ++ * There be dragons here, since we must deal with the inconsistency on ++ * the way out (here), it is impossible to detect/warn about the race ++ * the other way around (missing an incoming waiter). ++ * ++ * What could possibly go wrong... + */ + if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) + ret = 0; - unsigned long elf_hwcap __read_mostly; -@@ -575,27 +577,22 @@ unsigned long riscv_get_elf_hwcap(void) - return hwcap; - } ++ /* ++ * Now that the rt_waiter has been dequeued, it is safe to use ++ * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up ++ * the ++ */ ++ spin_lock(q.lock_ptr); + /* + * Waiter is unqueued. + */ +@@ -1143,6 +1144,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) + top_waiter = futex_top_waiter(hb, &key); + if (top_waiter) { + struct futex_pi_state *pi_state = top_waiter->pi_state; ++ struct rt_mutex_waiter *rt_waiter; --void check_unaligned_access(int cpu) -+static int check_unaligned_access(void *param) - { -+ int cpu = smp_processor_id(); - u64 start_cycles, end_cycles; - u64 word_cycles; - u64 byte_cycles; - int ratio; - unsigned long start_jiffies, now; -- struct page *page; -+ struct page *page = param; - void *dst; - void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + ret = -EINVAL; + if (!pi_state) +@@ -1155,22 +1157,39 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) + if (pi_state->owner != current) + goto out_unlock; - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) -- return; -- -- page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); -- if (!page) { -- pr_warn("Can't alloc pages to measure memcpy performance"); -- return; -- } -+ return 0; +- get_pi_state(pi_state); + /* + * By taking wait_lock while still holding hb->lock, we ensure +- * there is no point where we hold neither; and therefore +- * wake_futex_p() must observe a state consistent with what we +- * observed. ++ * there is no point where we hold neither; and thereby ++ * wake_futex_pi() must observe any new waiters. ++ * ++ * Since the cleanup: case in futex_lock_pi() removes the ++ * rt_waiter without holding hb->lock, it is possible for ++ * wake_futex_pi() to not find a waiter while the above does, ++ * in this case the waiter is on the way out and it can be ++ * ignored. + * + * In particular; this forces __rt_mutex_start_proxy() to + * complete such that we're guaranteed to observe the +- * rt_waiter. Also see the WARN in wake_futex_pi(). ++ * rt_waiter. + */ + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ ++ /* ++ * Futex vs rt_mutex waiter state -- if there are no rt_mutex ++ * waiters even though futex thinks there are, then the waiter ++ * is leaving and the uncontended path is safe to take. ++ */ ++ rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); ++ if (!rt_waiter) { ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ goto do_uncontended; ++ } ++ ++ get_pi_state(pi_state); + spin_unlock(&hb->lock); - /* Make an unaligned destination buffer. */ - dst = (void *)((unsigned long)page_address(page) | 0x1); -@@ -649,7 +646,7 @@ void check_unaligned_access(int cpu) - pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", - cpu); + /* drops pi_state->pi_mutex.wait_lock */ +- ret = wake_futex_pi(uaddr, uval, pi_state); ++ ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter); -- goto out; -+ return 0; + put_pi_state(pi_state); + +@@ -1198,6 +1217,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) + return ret; } - if (word_cycles < byte_cycles) -@@ -663,18 +660,83 @@ void check_unaligned_access(int cpu) - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); ++do_uncontended: + /* + * We have no kernel internal state, i.e. no waiters in the + * kernel. Waiters which are about to queue themselves are stuck +diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c +index cba8b1a6a4cc..4c73e0b81acc 100644 +--- a/kernel/futex/requeue.c ++++ b/kernel/futex/requeue.c +@@ -850,11 +850,13 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + pi_mutex = &q.pi_state->pi_mutex; + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); - per_cpu(misaligned_access_speed, cpu) = speed; -+ return 0; -+} +- /* Current is not longer pi_blocked_on */ +- spin_lock(q.lock_ptr); ++ /* ++ * See futex_unlock_pi()'s cleanup: comment. ++ */ + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) + ret = 0; --out: -- __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); -+static void check_unaligned_access_nonboot_cpu(void *param) -+{ -+ unsigned int cpu = smp_processor_id(); -+ struct page **pages = param; -+ -+ if (smp_processor_id() != 0) -+ check_unaligned_access(pages[cpu]); -+} -+ -+static int riscv_online_cpu(unsigned int cpu) -+{ -+ static struct page *buf; -+ -+ /* We are already set since the last check */ -+ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) -+ return 0; -+ -+ buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); -+ if (!buf) { -+ pr_warn("Allocation failure, not measuring misaligned performance\n"); -+ return -ENOMEM; -+ } -+ -+ check_unaligned_access(buf); -+ __free_pages(buf, MISALIGNED_BUFFER_ORDER); -+ return 0; - } ++ spin_lock(q.lock_ptr); + debug_rt_mutex_free_waiter(&rt_waiter); + /* + * Fixup the pi_state owner and possibly acquire the lock if we +-- +2.51.0 + +From 3bf693fef1e7016081fe5a3445cc87a6e04759c6 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 3 Aug 2023 12:09:31 +0200 +Subject: [PATCH 008/213] signal: Add proper comment about the preempt-disable + in ptrace_stop(). + +Commit 53da1d9456fe7 ("fix ptrace slowness") added a preempt-disable section +between read_unlock() and the following schedule() invocation without +explaining why it is needed. + +Replace the comment with an explanation why this is needed. Clarify that +it is needed for correctness but for performance reasons. + +Acked-by: Oleg Nesterov +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20230803100932.325870-2-bigeasy@linutronix.de +--- + kernel/signal.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +diff --git a/kernel/signal.c b/kernel/signal.c +index 49c8c24b444d..f14b6be19e79 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2331,10 +2331,21 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, + do_notify_parent_cldstop(current, false, why); --static int check_unaligned_access_boot_cpu(void) -+/* Measure unaligned access on all CPUs present at boot in parallel. */ -+static int check_unaligned_access_all_cpus(void) - { -- check_unaligned_access(0); -+ unsigned int cpu; -+ unsigned int cpu_count = num_possible_cpus(); -+ struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), -+ GFP_KERNEL); -+ -+ if (!bufs) { -+ pr_warn("Allocation failure, not measuring misaligned performance\n"); -+ return 0; -+ } -+ -+ /* -+ * Allocate separate buffers for each CPU so there's no fighting over -+ * cache lines. -+ */ -+ for_each_cpu(cpu, cpu_online_mask) { -+ bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); -+ if (!bufs[cpu]) { -+ pr_warn("Allocation failure, not measuring misaligned performance\n"); -+ goto out; -+ } -+ } -+ -+ /* Check everybody except 0, who stays behind to tend jiffies. */ -+ on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); -+ -+ /* Check core 0. */ -+ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); -+ -+ /* Setup hotplug callback for any new CPUs that come online. */ -+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", -+ riscv_online_cpu, NULL); -+ -+out: -+ for_each_cpu(cpu, cpu_online_mask) { -+ if (bufs[cpu]) -+ __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); -+ } -+ -+ kfree(bufs); - return 0; - } - --arch_initcall(check_unaligned_access_boot_cpu); -+arch_initcall(check_unaligned_access_all_cpus); - - void riscv_user_isa_enable(void) - { -diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c -index d1b0a6fc3..d162bf339 100644 ---- a/arch/riscv/kernel/smpboot.c -+++ b/arch/riscv/kernel/smpboot.c -@@ -248,7 +248,6 @@ asmlinkage __visible void smp_callin(void) - - numa_add_cpu(curr_cpuid); - set_cpu_online(curr_cpuid, 1); -- check_unaligned_access(curr_cpuid); - - if (has_vector()) { - if (riscv_v_setup_vsize()) -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index a06fab501..4e378ab31 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -118,6 +118,7 @@ config X86 - select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG - select ARCH_SUPPORTS_LTO_CLANG - select ARCH_SUPPORTS_LTO_CLANG_THIN -+ select ARCH_SUPPORTS_RT - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_MEMTEST - select ARCH_USE_QUEUED_RWLOCKS -@@ -272,6 +273,7 @@ config X86 - select HAVE_STATIC_CALL - select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL - select HAVE_PREEMPT_DYNAMIC_CALL -+ select HAVE_PREEMPT_AUTO - select HAVE_RSEQ - select HAVE_RUST if X86_64 - select HAVE_SYSCALL_TRACEPOINTS -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index d63b02940..1ff38ebbd 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -81,8 +81,9 @@ struct thread_info { - #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ --#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ --#define TIF_SSBD 5 /* Speculative store bypass disable */ -+#define TIF_ARCH_RESCHED_LAZY 4 /* Lazy rescheduling */ -+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/ -+#define TIF_SSBD 6 /* Speculative store bypass disable */ - #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ - #define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */ - #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ -@@ -104,6 +105,7 @@ struct thread_info { - #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -+#define _TIF_ARCH_RESCHED_LAZY (1 << TIF_ARCH_RESCHED_LAZY) - #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) - #define _TIF_SSBD (1 << TIF_SSBD) - #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) -diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c -index 831fa4a12..5af3ebec0 100644 ---- a/drivers/acpi/processor_idle.c -+++ b/drivers/acpi/processor_idle.c -@@ -107,7 +107,7 @@ static const struct dmi_system_id processor_power_dmi_table[] = { - */ - static void __cpuidle acpi_safe_halt(void) - { -- if (!tif_need_resched()) { -+ if (!need_resched()) { - raw_safe_halt(); - raw_local_irq_disable(); - } -diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c -index 44cf0e51d..adb645de0 100644 ---- a/drivers/block/zram/zram_drv.c -+++ b/drivers/block/zram/zram_drv.c -@@ -57,6 +57,41 @@ static void zram_free_page(struct zram *zram, size_t index); - static int zram_read_page(struct zram *zram, struct page *page, u32 index, - struct bio *parent); - -+#ifdef CONFIG_PREEMPT_RT -+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) -+{ -+ size_t index; -+ -+ for (index = 0; index < num_pages; index++) -+ spin_lock_init(&zram->table[index].lock); -+} -+ -+static int zram_slot_trylock(struct zram *zram, u32 index) -+{ -+ int ret; -+ -+ ret = spin_trylock(&zram->table[index].lock); -+ if (ret) -+ __set_bit(ZRAM_LOCK, &zram->table[index].flags); -+ return ret; -+} -+ -+static void zram_slot_lock(struct zram *zram, u32 index) -+{ -+ spin_lock(&zram->table[index].lock); -+ __set_bit(ZRAM_LOCK, &zram->table[index].flags); -+} -+ -+static void zram_slot_unlock(struct zram *zram, u32 index) -+{ -+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags); -+ spin_unlock(&zram->table[index].lock); -+} -+ -+#else -+ -+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } -+ - static int zram_slot_trylock(struct zram *zram, u32 index) - { - return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); -@@ -71,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) - { - bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); - } -+#endif - - static inline bool init_done(struct zram *zram) - { -@@ -1268,6 +1304,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) - - if (!huge_class_size) - huge_class_size = zs_huge_class_size(zram->mem_pool); -+ zram_meta_init_table_locks(zram, num_pages); - return true; - } - -diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h -index 35e322144..4e0c7c6d4 100644 ---- a/drivers/block/zram/zram_drv.h -+++ b/drivers/block/zram/zram_drv.h -@@ -69,6 +69,9 @@ struct zram_table_entry { - unsigned long element; - }; - unsigned long flags; -+#ifdef CONFIG_PREEMPT_RT -+ spinlock_t lock; -+#endif - #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME - ktime_t ac_time; - #endif -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c -index 172aa10a8..4ae472053 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c -@@ -60,11 +60,9 @@ static DEFINE_PER_CPU(int, fpu_recursion_depth); - */ - inline void dc_assert_fp_enabled(void) + /* +- * Don't want to allow preemption here, because +- * sys_ptrace() needs this task to be inactive. ++ * The previous do_notify_parent_cldstop() invocation woke ptracer. ++ * One a PREEMPTION kernel this can result in preemption requirement ++ * which will be fulfilled after read_unlock() and the ptracer will be ++ * put on the CPU. ++ * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for ++ * this task wait in schedule(). If this task gets preempted then it ++ * remains enqueued on the runqueue. The ptracer will observe this and ++ * then sleep for a delay of one HZ tick. In the meantime this task ++ * gets scheduled, enters schedule() and will wait for the ptracer. + * +- * XXX: implement read_unlock_no_resched(). ++ * This preemption point is not bad from correctness point of view but ++ * extends the runtime by one HZ tick time due to the ptracer's sleep. ++ * The preempt-disable section ensures that there will be no preemption ++ * between unlock and schedule() and so improving the performance since ++ * the ptracer has no reason to sleep. + */ + preempt_disable(); + read_unlock(&tasklist_lock); +-- +2.51.0 + +From 35fe7d976b056f58338bdd27c8ee4ea9318c99c5 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 3 Aug 2023 12:09:32 +0200 +Subject: [PATCH 009/213] signal: Don't disable preemption in ptrace_stop() on + PREEMPT_RT. + +On PREEMPT_RT keeping preemption disabled during the invocation of +cgroup_enter_frozen() is a problem because the function acquires css_set_lock +which is a sleeping lock on PREEMPT_RT and must not be acquired with disabled +preemption. +The preempt-disabled section is only for performance optimisation +reasons and can be avoided. + +Extend the comment and don't disable preemption before scheduling on +PREEMPT_RT. + +Acked-by: Oleg Nesterov +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20230803100932.325870-3-bigeasy@linutronix.de +--- + kernel/signal.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/kernel/signal.c b/kernel/signal.c +index f14b6be19e79..107953e8a90c 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2346,11 +2346,20 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, + * The preempt-disable section ensures that there will be no preemption + * between unlock and schedule() and so improving the performance since + * the ptracer has no reason to sleep. ++ * ++ * On PREEMPT_RT locking tasklist_lock does not disable preemption. ++ * Therefore the task can be preempted (after ++ * do_notify_parent_cldstop()) before unlocking tasklist_lock so there ++ * is no benefit in doing this. The optimisation is harmful on ++ * PEEMPT_RT because the spinlock_t (in cgroup_enter_frozen()) must not ++ * be acquired with disabled preemption. + */ +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); +- preempt_enable_no_resched(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable_no_resched(); + schedule(); + cgroup_leave_frozen(true); + +-- +2.51.0 + +From 5212296a3021a05bcce471fe9d942cd8524018a0 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2023 16:15:12 +0200 +Subject: [PATCH 010/213] drm/amd/display: Remove migrate_en/dis from + dc_fpu_begin(). + +This is a revert of the commit mentioned below while it is not wrong, as +in the kernel will explode, having migrate_disable() here it is +complete waste of resources. + +Additionally commit message is plain wrong the review tag does not make +it any better. The migrate_disable() interface has a fat comment +describing it and it includes the word "undesired" in the headline which +should tickle people to read it before using it. +Initially I assumed it is worded too harsh but now I beg to differ. + +The reviewer of the original commit, even not understanding what +migrate_disable() does should ask the following: + +- migrate_disable() is added only to the CONFIG_X86 block and it claims + to protect fpu_recursion_depth. Why are the other the architectures + excluded? + +- migrate_disable() is added after fpu_recursion_depth was modified. + Shouldn't it be added before the modification or referencing takes + place? + +Moving on. +Disabling preemption DOES prevent CPU migration. A task, that can not be +pushed away from the CPU by the scheduler (due to disabled preemption) +can not be pushed or migrated to another CPU. + +Disabling migration DOES NOT ensure consistency of per-CPU variables. It +only ensures that the task acts always on the same per-CPU variable. The +task remains preemptible meaning multiple tasks can access the same +per-CPU variable. This in turn leads to inconsistency for the statement + + *pcpu -= 1; + +with two tasks on one CPU and a preemption point during the RMW +operation: + + Task A Task B + read pcpu to reg # 0 + inc reg # 0 -> 1 + read pcpu to reg # 0 + inc reg # 0 -> 1 + write reg to pcpu # 1 + write reg to pcpu # 1 + +At the end pcpu reads 1 but should read 2 instead. Boom. + +get_cpu_ptr() already contains a preempt_disable() statement. That means +that the per-CPU variable can only be referenced by a single task which +is currently running. The only inconsistency that can occur if the +variable is additionally accessed from an interrupt. + +Remove migrate_disable/enable() from dc_fpu_begin/end(). + +Cc: Tianci Yin +Cc: Aurabindo Pillai +Fixes: 0c316556d1249 ("drm/amd/display: Disable migration to ensure consistency of per-CPU variable") +Link: https://lore.kernel.org/r/20230921141516.520471-2-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +index 172aa10a8800..86f4c0e04654 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +@@ -91,7 +91,6 @@ void dc_fpu_begin(const char *function_name, const int line) + + if (*pcpu == 1) { + #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) +- migrate_disable(); + kernel_fpu_begin(); + #elif defined(CONFIG_PPC64) + if (cpu_has_feature(CPU_FTR_VSX_COMP)) { +@@ -132,7 +131,6 @@ void dc_fpu_end(const char *function_name, const int line) + if (*pcpu <= 0) { + #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) + kernel_fpu_end(); +- migrate_enable(); + #elif defined(CONFIG_PPC64) + if (cpu_has_feature(CPU_FTR_VSX_COMP)) { + disable_kernel_vsx(); +-- +2.51.0 + +From 83a2dd3dd2e6b003cf2456ad912223164b053fb8 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2023 16:15:13 +0200 +Subject: [PATCH 011/213] drm/amd/display: Simplify the per-CPU usage. + +The fpu_recursion_depth counter is used to ensure that dc_fpu_begin() +can be invoked multiple times while the FPU-disable function itself is +only invoked once. Also the counter part (dc_fpu_end()) is ballanced +properly. + +Instead of using the get_cpu_ptr() dance around the inc it is simpler to +increment the per-CPU variable directly. Also the per-CPU variable has +to be incremented and decremented on the same CPU. This is ensured by +the inner-part which disables preemption. This is kind of not obvious, +works and the preempt-counter is touched a few times for no reason. + +Disable preemption before incrementing fpu_recursion_depth for the first +time. Keep preemption disabled until dc_fpu_end() where the counter is +decremented making it obvious that the preemption has to stay disabled +while the counter is non-zero. +Use simple inc/dec functions. +Remove the nested preempt_disable/enable functions which are now not +needed. + +Link: https://lore.kernel.org/r/20230921141516.520471-3-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 50 ++++++++----------- + 1 file changed, 20 insertions(+), 30 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +index 86f4c0e04654..8bd5926b47e0 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +@@ -60,11 +60,9 @@ static DEFINE_PER_CPU(int, fpu_recursion_depth); + */ + inline void dc_assert_fp_enabled(void) { - int *pcpu, depth = 0; + int depth; @@ -805,7 +1043,7 @@ index 172aa10a8..4ae472053 100644 ASSERT(depth >= 1); } -@@ -84,33 +82,28 @@ inline void dc_assert_fp_enabled(void) +@@ -84,32 +82,27 @@ inline void dc_assert_fp_enabled(void) */ void dc_fpu_begin(const char *function_name, const int line) { @@ -814,14 +1052,12 @@ index 172aa10a8..4ae472053 100644 - pcpu = get_cpu_ptr(&fpu_recursion_depth); - *pcpu += 1; -+ WARN_ON_ONCE(!in_task()); + preempt_disable(); + depth = __this_cpu_inc_return(fpu_recursion_depth); - if (*pcpu == 1) { + if (depth == 1) { #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) -- migrate_disable(); kernel_fpu_begin(); #elif defined(CONFIG_PPC64) - if (cpu_has_feature(CPU_FTR_VSX_COMP)) { @@ -848,7 +1084,7 @@ index 172aa10a8..4ae472053 100644 } /** -@@ -125,30 +118,26 @@ void dc_fpu_begin(const char *function_name, const int line) +@@ -124,29 +117,26 @@ void dc_fpu_begin(const char *function_name, const int line) */ void dc_fpu_end(const char *function_name, const int line) { @@ -862,7 +1098,6 @@ index 172aa10a8..4ae472053 100644 + if (depth == 0) { #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) kernel_fpu_end(); -- migrate_enable(); #elif defined(CONFIG_PPC64) - if (cpu_has_feature(CPU_FTR_VSX_COMP)) { + if (cpu_has_feature(CPU_FTR_VSX_COMP)) @@ -889,31 +1124,64 @@ index 172aa10a8..4ae472053 100644 + TRACE_DCN_FPU(false, function_name, line, depth); + preempt_enable(); } -diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c -index 294609557..56c42867b 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c -@@ -2142,9 +2142,17 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) +-- +2.51.0 + +From 675e8fbd3c925af41559e6888800d6624209e3c7 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2023 16:15:14 +0200 +Subject: [PATCH 012/213] drm/amd/display: Add a warning if the FPU is used + outside from task context. + +Add a warning if the FPU is used from any context other than task +context. This is only precaution since the code is not able to be used +from softirq while the API allows it on x86 for instance. + +Link: https://lore.kernel.org/r/20230921141516.520471-4-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +index 8bd5926b47e0..4ae4720535a5 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +@@ -84,6 +84,7 @@ void dc_fpu_begin(const char *function_name, const int line) { - bool voltage_supported; -+ display_e2e_pipe_params_st *pipes; -+ -+ pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); -+ if (!pipes) -+ return false; -+ - DC_FP_START(); -- voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate); -+ voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes); - DC_FP_END(); -+ -+ kfree(pipes); - return voltage_supported; - } + int depth; ++ WARN_ON_ONCE(!in_task()); + preempt_disable(); + depth = __this_cpu_inc_return(fpu_recursion_depth); + +-- +2.51.0 + +From 5d99cd803806f454a0bd6439781e316c58f8d72e Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2023 16:15:15 +0200 +Subject: [PATCH 013/213] drm/amd/display: Move the memory allocation out of + dcn21_validate_bandwidth_fp(). + +dcn21_validate_bandwidth_fp() is invoked while FPU access has been +enabled. FPU access requires disabling preemption even on PREEMPT_RT. +It is not possible to allocate memory with disabled preemption even with +GFP_ATOMIC on PREEMPT_RT. + +Move the memory allocation before FPU access is enabled. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217928 +Link: https://lore.kernel.org/r/20230921141516.520471-5-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 10 +++++++++- + drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 7 ++----- + drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h | 5 ++--- + 3 files changed, 13 insertions(+), 9 deletions(-) + diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c -index 24105a5b9..349c22dac 100644 +index 24105a5b9f2a..349c22daccf2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -954,9 +954,17 @@ static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, @@ -936,19 +1204,22 @@ index 24105a5b9..349c22dac 100644 } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c -index 8a5a038fd..68970d6cf 100644 +index 8a5a038fd855..89d4e969cfd8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c -@@ -2018,7 +2018,7 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st +@@ -2311,9 +2311,8 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context, + &context->bw_ctx.dml, pipes, pipe_cnt); } - static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context, -- bool fast_validate) -+ bool fast_validate, display_e2e_pipe_params_st *pipes) +-bool dcn21_validate_bandwidth_fp(struct dc *dc, +- struct dc_state *context, +- bool fast_validate) ++bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, ++ bool fast_validate, display_e2e_pipe_params_st *pipes) { bool out = false; -@@ -2027,7 +2027,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co +@@ -2322,7 +2321,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, int vlevel = 0; int pipe_split_from[MAX_PIPES]; int pipe_cnt = 0; @@ -956,7 +1227,7 @@ index 8a5a038fd..68970d6cf 100644 DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); -@@ -2062,16 +2061,14 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co +@@ -2362,7 +2360,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, out = false; validate_out: @@ -964,54 +1235,86 @@ index 8a5a038fd..68970d6cf 100644 BW_VAL_TRACE_FINISH(); - return out; - } - --bool dcn20_validate_bandwidth_fp(struct dc *dc, +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +index c51badf7b68a..a81a0b9e6884 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +@@ -77,9 +77,8 @@ int dcn21_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); +-bool dcn21_validate_bandwidth_fp(struct dc *dc, - struct dc_state *context, -- bool fast_validate) -+bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, -+ bool fast_validate, display_e2e_pipe_params_st *pipes) - { - bool voltage_supported = false; - bool full_pstate_supported = false; -@@ -2090,11 +2087,11 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc, - ASSERT(context != dc->current_state); - - if (fast_validate) { -- return dcn20_validate_bandwidth_internal(dc, context, true); -+ return dcn20_validate_bandwidth_internal(dc, context, true, pipes); - } +- bool fast_validate); ++bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, bool ++ fast_validate, display_e2e_pipe_params_st *pipes); + void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); - // Best case, we support full UCLK switch latency -- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); -+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false, pipes); - full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; - - if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || -@@ -2106,7 +2103,8 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc, - // Fallback: Try to only support G6 temperature read latency - context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; - -- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); -+ memset(pipes, 0, dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st)); -+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false, pipes); - dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); +-- +2.51.0 + +From 5294da6d360ec647fb099b73ba2262979fa203e9 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2023 16:15:16 +0200 +Subject: [PATCH 014/213] drm/amd/display: Move the memory allocation out of + dcn20_validate_bandwidth_fp(). + +dcn20_validate_bandwidth_fp() is invoked while FPU access has been +enabled. FPU access requires disabling preemption even on PREEMPT_RT. +It is not possible to allocate memory with disabled preemption even with +GFP_ATOMIC on PREEMPT_RT. + +Move the memory allocation before FPU access is enabled. +To preserve previous "clean" state of "pipes" add a memset() before the +second invocation of dcn20_validate_bandwidth_internal() where the +variable is used. + +Link: https://lore.kernel.org/r/20230921141516.520471-6-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + .../drm/amd/display/dc/dcn20/dcn20_resource.c | 10 +++++++++- + .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 16 +++++++--------- + .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h | 5 ++--- + 3 files changed, 18 insertions(+), 13 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +index 294609557b73..56c42867b20f 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +@@ -2142,9 +2142,17 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) + { + bool voltage_supported; ++ display_e2e_pipe_params_st *pipes; ++ ++ pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); ++ if (!pipes) ++ return false; ++ + DC_FP_START(); +- voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate); ++ voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes); + DC_FP_END(); ++ ++ kfree(pipes); + return voltage_supported; + } - if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) { -@@ -2311,9 +2309,8 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context, - &context->bw_ctx.dml, pipes, pipe_cnt); +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +index 89d4e969cfd8..68970d6cf031 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +@@ -2018,7 +2018,7 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st } --bool dcn21_validate_bandwidth_fp(struct dc *dc, -- struct dc_state *context, -- bool fast_validate) -+bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, -+ bool fast_validate, display_e2e_pipe_params_st *pipes) + static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context, +- bool fast_validate) ++ bool fast_validate, display_e2e_pipe_params_st *pipes) { bool out = false; -@@ -2322,7 +2319,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, +@@ -2027,7 +2027,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co int vlevel = 0; int pipe_split_from[MAX_PIPES]; int pipe_cnt = 0; @@ -1019,7 +1322,7 @@ index 8a5a038fd..68970d6cf 100644 DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); -@@ -2362,7 +2358,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, +@@ -2062,16 +2061,14 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co out = false; validate_out: @@ -1027,8 +1330,43 @@ index 8a5a038fd..68970d6cf 100644 BW_VAL_TRACE_FINISH(); + return out; + } + +-bool dcn20_validate_bandwidth_fp(struct dc *dc, +- struct dc_state *context, +- bool fast_validate) ++bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, ++ bool fast_validate, display_e2e_pipe_params_st *pipes) + { + bool voltage_supported = false; + bool full_pstate_supported = false; +@@ -2090,11 +2087,11 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc, + ASSERT(context != dc->current_state); + + if (fast_validate) { +- return dcn20_validate_bandwidth_internal(dc, context, true); ++ return dcn20_validate_bandwidth_internal(dc, context, true, pipes); + } + + // Best case, we support full UCLK switch latency +- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); ++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false, pipes); + full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || +@@ -2106,7 +2103,8 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc, + // Fallback: Try to only support G6 temperature read latency + context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; + +- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); ++ memset(pipes, 0, dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st)); ++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false, pipes); + dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h -index c51badf7b..b6c34198d 100644 +index a81a0b9e6884..b6c34198ddc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -61,9 +61,8 @@ void dcn20_update_bounding_box(struct dc *dc, @@ -1043,360 +1381,1357 @@ index c51badf7b..b6c34198d 100644 void dcn20_fpu_set_wm_ranges(int i, struct pp_smu_wm_range_sets *ranges, struct _vcs_dpi_soc_bounding_box_st *loaded_bb); -@@ -77,9 +76,8 @@ int dcn21_populate_dml_pipes_from_context(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate); --bool dcn21_validate_bandwidth_fp(struct dc *dc, -- struct dc_state *context, -- bool fast_validate); -+bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, bool -+ fast_validate, display_e2e_pipe_params_st *pipes); - void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); - - void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); -diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig -index ce397a879..98c3f5328 100644 ---- a/drivers/gpu/drm/i915/Kconfig -+++ b/drivers/gpu/drm/i915/Kconfig -@@ -3,7 +3,6 @@ config DRM_I915 - tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" - depends on DRM - depends on X86 && PCI -- depends on !PREEMPT_RT - select INTEL_GTT if X86 - select INTERVAL_TREE - # we need shmfs for the swappable backing store, and in particular -diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c -index cfbfbfed3..da2becfbc 100644 ---- a/drivers/gpu/drm/i915/display/intel_crtc.c -+++ b/drivers/gpu/drm/i915/display/intel_crtc.c -@@ -562,7 +562,8 @@ void intel_pipe_update_start(struct intel_atomic_state *state, - */ - intel_psr_wait_for_idle_locked(new_crtc_state); - -- local_irq_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_disable(); - - crtc->debug.min_vbl = min; - crtc->debug.max_vbl = max; -@@ -587,11 +588,13 @@ void intel_pipe_update_start(struct intel_atomic_state *state, - break; - } +-- +2.51.0 + +From ba68079dda8cdae6cfba82d53490f9d80debcf16 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 15 Aug 2022 17:29:50 +0200 +Subject: [PATCH 015/213] net: Avoid the IPI to free the + +skb_attempt_defer_free() collects a skbs, which was allocated on a +remote CPU, on a per-CPU list. These skbs are either freed on that +remote CPU once the CPU enters NET_RX or an remote IPI function is +invoked in to raise the NET_RX softirq if a threshold of pending skb has +been exceeded. +This remote IPI can cause the wakeup of ksoftirqd on PREEMPT_RT if the +remote CPU idle was idle. This is undesired because once the ksoftirqd +is running it will acquire all pending softirqs and they will not be +executed as part of the threaded interrupt until ksoftird goes idle +again. + +To void all this, schedule the deferred clean up from a worker. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/netdevice.h | 4 ++++ + net/core/dev.c | 39 ++++++++++++++++++++++++++++++--------- + net/core/skbuff.c | 7 ++++++- + 3 files changed, 40 insertions(+), 10 deletions(-) + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 030d9de2ba2d..1f5ae8f5a3db 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3267,7 +3267,11 @@ struct softnet_data { + int defer_count; + int defer_ipi_scheduled; + struct sk_buff *defer_list; ++#ifndef CONFIG_PREEMPT_RT + call_single_data_t defer_csd; ++#else ++ struct work_struct defer_work; ++#endif + }; -- local_irq_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_enable(); + static inline void input_queue_head_incr(struct softnet_data *sd) +diff --git a/net/core/dev.c b/net/core/dev.c +index 2d3e0e4130c2..61b4294b3441 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4758,15 +4758,6 @@ static void rps_trigger_softirq(void *data) - timeout = schedule_timeout(timeout); + #endif /* CONFIG_RPS */ -- local_irq_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_disable(); +-/* Called from hardirq (IPI) context */ +-static void trigger_rx_softirq(void *data) +-{ +- struct softnet_data *sd = data; +- +- __raise_softirq_irqoff(NET_RX_SOFTIRQ); +- smp_store_release(&sd->defer_ipi_scheduled, 0); +-} +- + /* + * After we queued a packet into sd->input_pkt_queue, + * we need to make sure this queue is serviced soon. +@@ -6735,6 +6726,32 @@ static void skb_defer_free_flush(struct softnet_data *sd) } + } - finish_wait(wq, &wait); -@@ -624,7 +627,8 @@ void intel_pipe_update_start(struct intel_atomic_state *state, - return; ++#ifndef CONFIG_PREEMPT_RT ++ ++/* Called from hardirq (IPI) context */ ++static void trigger_rx_softirq(void *data) ++{ ++ struct softnet_data *sd = data; ++ ++ __raise_softirq_irqoff(NET_RX_SOFTIRQ); ++ smp_store_release(&sd->defer_ipi_scheduled, 0); ++} ++ ++#else ++ ++static void trigger_rx_softirq(struct work_struct *defer_work) ++{ ++ struct softnet_data *sd; ++ ++ sd = container_of(defer_work, struct softnet_data, defer_work); ++ smp_store_release(&sd->defer_ipi_scheduled, 0); ++ local_bh_disable(); ++ skb_defer_free_flush(sd); ++ local_bh_enable(); ++} ++ ++#endif ++ + static int napi_threaded_poll(void *data) + { + struct napi_struct *napi = data; +@@ -11676,7 +11693,11 @@ static int __init net_dev_init(void) + INIT_CSD(&sd->csd, rps_trigger_softirq, sd); + sd->cpu = i; + #endif ++#ifndef CONFIG_PREEMPT_RT + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); ++#else ++ INIT_WORK(&sd->defer_work, trigger_rx_softirq); ++#endif + spin_lock_init(&sd->defer_lock); - irq_disable: -- local_irq_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_disable(); + init_gro_hash(&sd->backlog); +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 21a83e26f004..4c42f9b7e238 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -6863,8 +6863,13 @@ nodefer: __kfree_skb(skb); + /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU + * if we are unlucky enough (this seems very unlikely). + */ +- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) ++ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { ++#ifndef CONFIG_PREEMPT_RT + smp_call_function_single_async(cpu, &sd->defer_csd); ++#else ++ schedule_work_on(cpu, &sd->defer_work); ++#endif ++ } } - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) -@@ -728,7 +732,8 @@ void intel_pipe_update_end(struct intel_atomic_state *state, - */ - intel_vrr_send_push(new_crtc_state); + static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, +-- +2.51.0 + +From 88b7ea588c0c672523656f85cf67cdbd46f4d89c Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 7 Aug 2019 18:15:38 +0200 +Subject: [PATCH 016/213] x86: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/x86/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 2b5b7d9a24e9..14cff9f0645f 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -28,6 +28,7 @@ config X86_64 + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_PER_VMA_LOCK ++ select ARCH_SUPPORTS_RT + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_SOFT_DIRTY + select MODULES_USE_ELF_RELA +-- +2.51.0 + +From d54bd7d039d01ff24a8df761bb05f0c5c22e926c Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 7 Nov 2019 17:49:20 +0100 +Subject: [PATCH 017/213] x86: Enable RT also on 32bit + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/x86/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 14cff9f0645f..14e47444817a 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -28,7 +28,6 @@ config X86_64 + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_PER_VMA_LOCK +- select ARCH_SUPPORTS_RT + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_SOFT_DIRTY + select MODULES_USE_ELF_RELA +@@ -119,6 +118,7 @@ config X86 + select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN ++ select ARCH_SUPPORTS_RT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS +-- +2.51.0 + +From fa1d39c3b74066ba7a81c87e4d97be3f6599ddf0 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Tue, 1 Aug 2023 17:26:48 +0200 +Subject: [PATCH 018/213] sched/rt: Don't try push tasks if there are none. + +I have a RT task X at a high priority and cyclictest on each CPU with +lower priority than X's. If X is active and each CPU wakes their own +cylictest thread then it ends in a longer rto_push storm. +A random CPU determines via balance_rt() that the CPU on which X is +running needs to push tasks. X has the highest priority, cyclictest is +next in line so there is nothing that can be done since the task with +the higher priority is not touched. + +tell_cpu_to_push() increments rto_loop_next and schedules +rto_push_irq_work_func() on X's CPU. The other CPUs also increment the +loop counter and do the same. Once rto_push_irq_work_func() is active it +does nothing because it has _no_ pushable tasks on its runqueue. Then +checks rto_next_cpu() and decides to queue irq_work on the local CPU +because another CPU requested a push by incrementing the counter. + +I have traces where ~30 CPUs request this ~3 times each before it +finally ends. This greatly increases X's runtime while X isn't making +much progress. + +Teach rto_next_cpu() to only return CPUs which also have tasks on their +runqueue which can be pushed away. This does not reduce the +tell_cpu_to_push() invocations (rto_loop_next counter increments) but +reduces the amount of issued rto_push_irq_work_func() if nothing can be +done. As the result the overloaded CPU is blocked less often. + +There are still cases where the "same job" is repeated several times +(for instance the current CPU needs to resched but didn't yet because +the irq-work is repeated a few times and so the old task remains on the +CPU) but the majority of request end in tell_cpu_to_push() before an IPI +is issued. + +Reviewed-by: "Steven Rostedt (Google)" +Link: https://lore.kernel.org/r/20230801152648._y603AS_@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/sched/rt.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c +index 91b1ee0d81fc..57cfa0b67b79 100644 +--- a/kernel/sched/rt.c ++++ b/kernel/sched/rt.c +@@ -2244,8 +2244,11 @@ static int rto_next_cpu(struct root_domain *rd) -- local_irq_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_enable(); + rd->rto_cpu = cpu; - if (intel_vgpu_active(dev_priv)) - return; -diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c -index f5659ebd0..5b6d2f555 100644 ---- a/drivers/gpu/drm/i915/display/intel_vblank.c -+++ b/drivers/gpu/drm/i915/display/intel_vblank.c -@@ -294,7 +294,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, - */ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - -- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); - - /* Get optional system timestamp before query. */ - if (stime) -@@ -358,7 +359,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, - if (etime) - *etime = ktime_get(); - -- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable(); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); +- if (cpu < nr_cpu_ids) ++ if (cpu < nr_cpu_ids) { ++ if (!has_pushable_tasks(cpu_rq(cpu))) ++ continue; + return cpu; ++ } -diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -index 2065be5a1..73d815fc5 100644 ---- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - * and context switches) submission. - */ + rd->rto_cpu = -1; -- spin_lock(&sched_engine->lock); -+ spin_lock_irq(&sched_engine->lock); +-- +2.51.0 + +From 7747b752ba3fc0a068bab5eda50216dff89961a6 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 1 Dec 2021 17:41:09 +0100 +Subject: [PATCH 019/213] softirq: Use a dedicated thread for timer wakeups. + +A timer/hrtimer softirq is raised in-IRQ context. With threaded +interrupts enabled or on PREEMPT_RT this leads to waking the ksoftirqd +for the processing of the softirq. +Once the ksoftirqd is marked as pending (or is running) it will collect +all raised softirqs. This in turn means that a softirq which would have +been processed at the end of the threaded interrupt, which runs at an +elevated priority, is now moved to ksoftirqd which runs at SCHED_OTHER +priority and competes with every regular task for CPU resources. +This introduces long delays on heavy loaded systems and is not desired +especially if the system is not overloaded by the softirqs. + +Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated +timers thread and let it run at the lowest SCHED_FIFO priority. +RT tasks are are woken up from hardirq context so only timer_list timers +and hrtimers for "regular" tasks are processed here. The higher priority +ensures that wakeups are performed before scheduling SCHED_OTHER tasks. + +Using a dedicated variable to store the pending softirq bits values +ensure that the timer are not accidentally picked up by ksoftirqd and +other threaded interrupts. +It shouldn't be picked up by ksoftirqd since it runs at lower priority. +However if the timer bits are ORed while a threaded interrupt is +running, then the timer softirq would be performed at higher priority. +The new timer thread will block on the softirq lock before it starts +softirq work. This "race window" isn't closed because while timer thread +is performing the softirq it can get PI-boosted via the softirq lock by +a random force-threaded thread. +The timer thread can pick up pending softirqs from ksoftirqd but only +if the softirq load is high. It is not be desired that the picked up +softirqs are processed at SCHED_FIFO priority under high softirq load +but this can already happen by a PI-boost by a force-threaded interrupt. + +Reported-by: kernel test robot [ static timer_threads ] +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 16 +++++++ + kernel/softirq.c | 92 +++++++++++++++++++++++++++++++++++++-- + kernel/time/hrtimer.c | 4 +- + kernel/time/timer.c | 2 +- + 4 files changed, 108 insertions(+), 6 deletions(-) + +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index 2610a7d156da..b0da2976f899 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -609,6 +609,22 @@ extern void __raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq(unsigned int nr); - /* - * If the queue is higher priority than the last -@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - * Even if ELSP[1] is occupied and not worthy - * of timeslices, our queue might be. - */ -- spin_unlock(&sched_engine->lock); -+ spin_unlock_irq(&sched_engine->lock); - return; - } - } -@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ++#ifdef CONFIG_PREEMPT_RT ++extern void raise_timer_softirq(void); ++extern void raise_hrtimer_softirq(void); ++ ++#else ++static inline void raise_timer_softirq(void) ++{ ++ raise_softirq(TIMER_SOFTIRQ); ++} ++ ++static inline void raise_hrtimer_softirq(void) ++{ ++ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++} ++#endif ++ + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); - if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.sched_engine->lock); -- spin_unlock(&engine->sched_engine->lock); -+ spin_unlock_irq(&engine->sched_engine->lock); - return; /* leave this for another sibling */ - } + static inline struct task_struct *this_cpu_ksoftirqd(void) +diff --git a/kernel/softirq.c b/kernel/softirq.c +index d9e37f3fa130..25d007388f41 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -648,6 +648,29 @@ static inline void tick_irq_exit(void) + #endif + } -@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - */ - sched_engine->queue_priority_hint = queue_prio(sched_engine); - i915_sched_engine_reset_on_empty(sched_engine); -- spin_unlock(&sched_engine->lock); -+ spin_unlock_irq(&sched_engine->lock); ++#ifdef CONFIG_PREEMPT_RT ++static DEFINE_PER_CPU(struct task_struct *, timersd); ++static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); ++ ++static unsigned int local_pending_timers(void) ++{ ++ return __this_cpu_read(pending_timer_softirq); ++} ++ ++static void wake_timersd(void) ++{ ++ struct task_struct *tsk = __this_cpu_read(timersd); ++ ++ if (tsk) ++ wake_up_process(tsk); ++} ++ ++#else ++ ++static inline void wake_timersd(void) { } ++ ++#endif ++ + static inline void __irq_exit_rcu(void) + { + #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED +@@ -657,8 +680,13 @@ static inline void __irq_exit_rcu(void) + #endif + account_hardirq_exit(current); + preempt_count_sub(HARDIRQ_OFFSET); +- if (!in_interrupt() && local_softirq_pending()) +- invoke_softirq(); ++ if (!in_interrupt()) { ++ if (local_softirq_pending()) ++ invoke_softirq(); ++ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers()) ++ wake_timersd(); ++ } - /* - * We can skip poking the HW if we ended up with exactly the same set -@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - } + tick_irq_exit(); } +@@ -992,12 +1020,70 @@ static struct smp_hotplug_thread softirq_threads = { + .thread_comm = "ksoftirqd/%u", + }; --static void execlists_dequeue_irq(struct intel_engine_cs *engine) --{ -- local_irq_disable(); /* Suspend interrupts across request submission */ -- execlists_dequeue(engine); -- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ --} -- - static void clear_ports(struct i915_request **ports, int count) ++#ifdef CONFIG_PREEMPT_RT ++static void timersd_setup(unsigned int cpu) ++{ ++ sched_set_fifo_low(current); ++} ++ ++static int timersd_should_run(unsigned int cpu) ++{ ++ return local_pending_timers(); ++} ++ ++static void run_timersd(unsigned int cpu) ++{ ++ unsigned int timer_si; ++ ++ ksoftirqd_run_begin(); ++ ++ timer_si = local_pending_timers(); ++ __this_cpu_write(pending_timer_softirq, 0); ++ or_softirq_pending(timer_si); ++ ++ __do_softirq(); ++ ++ ksoftirqd_run_end(); ++} ++ ++static void raise_ktimers_thread(unsigned int nr) ++{ ++ trace_softirq_raise(nr); ++ __this_cpu_or(pending_timer_softirq, 1 << nr); ++} ++ ++void raise_hrtimer_softirq(void) ++{ ++ raise_ktimers_thread(HRTIMER_SOFTIRQ); ++} ++ ++void raise_timer_softirq(void) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ raise_ktimers_thread(TIMER_SOFTIRQ); ++ wake_timersd(); ++ local_irq_restore(flags); ++} ++ ++static struct smp_hotplug_thread timer_threads = { ++ .store = &timersd, ++ .setup = timersd_setup, ++ .thread_should_run = timersd_should_run, ++ .thread_fn = run_timersd, ++ .thread_comm = "ktimers/%u", ++}; ++#endif ++ + static __init int spawn_ksoftirqd(void) { - memset_p((void **)ports, NULL, count); -@@ -2478,7 +2471,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) + cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, + takeover_tasklets); + BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); +- ++#ifdef CONFIG_PREEMPT_RT ++ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); ++#endif + return 0; + } + early_initcall(spawn_ksoftirqd); +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 6d9da768604d..1e9cc3abf63e 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1875,7 +1875,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; +- raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ raise_hrtimer_softirq(); } - if (!engine->execlists.pending[0]) { -- execlists_dequeue_irq(engine); -+ execlists_dequeue(engine); - start_timeslice(engine); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); +@@ -1988,7 +1988,7 @@ void hrtimer_run_queues(void) + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; +- raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ raise_hrtimer_softirq(); } -diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c -index 13fb8e504..b51fb0c97 100644 ---- a/drivers/gpu/drm/i915/gt/intel_reset.c -+++ b/drivers/gpu/drm/i915/gt/intel_reset.c -@@ -164,13 +164,13 @@ static int i915_do_reset(struct intel_gt *gt, - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - udelay(50); -- err = wait_for_atomic(i915_in_reset(pdev), 50); -+ err = _wait_for_atomic(i915_in_reset(pdev), 50, 0); - - /* Clear the reset request. */ - pci_write_config_byte(pdev, I915_GDRST, 0); - udelay(50); - if (!err) -- err = wait_for_atomic(!i915_in_reset(pdev), 50); -+ err = _wait_for_atomic(!i915_in_reset(pdev), 50, 0); - - return err; + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 63a8ce7177dd..7cad6fe3c035 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -2054,7 +2054,7 @@ static void run_local_timers(void) + if (time_before(jiffies, base->next_expiry)) + return; + } +- raise_softirq(TIMER_SOFTIRQ); ++ raise_timer_softirq(); } -@@ -190,7 +190,7 @@ static int g33_do_reset(struct intel_gt *gt, - struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); -- return wait_for_atomic(g4x_reset_complete(pdev), 50); -+ return _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); - } + /* +-- +2.51.0 + +From 04e647ed1664a13565973cfbb13873d682dc7273 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Tue, 5 Apr 2022 03:07:51 +0200 +Subject: [PATCH 020/213] rcutorture: Also force sched priority to timersd on + boosting test. + +ksoftirqd is statically boosted to the priority level right above the +one of rcu_torture_boost() so that timers, which torture readers rely on, +get a chance to run while rcu_torture_boost() is polling. + +However timers processing got split from ksoftirqd into their own kthread +(timersd) that isn't boosted. It has the same SCHED_FIFO low prio as +rcu_torture_boost() and therefore timers can't preempt it and may +starve. + +The issue can be triggered in practice on v5.17.1-rt17 using: + + ./kvm.sh --allcpus --configs TREE04 --duration 10m --kconfig "CONFIG_EXPERT=y CONFIG_PREEMPT_RT=y" + +Fix this with statically boosting timersd just like is done with +ksoftirqd in commit + ea6d962e80b61 ("rcutorture: Judge RCU priority boosting on grace periods, not callbacks") + +Suggested-by: Mel Gorman +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Signed-off-by: Frederic Weisbecker +Link: https://lkml.kernel.org/r/20220405010752.1347437-1-frederic@kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 1 + + kernel/rcu/rcutorture.c | 6 ++++++ + kernel/softirq.c | 2 +- + 3 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index b0da2976f899..c84f90a15f48 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -610,6 +610,7 @@ extern void raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq(unsigned int nr); - static int g4x_do_reset(struct intel_gt *gt, -@@ -207,7 +207,7 @@ static int g4x_do_reset(struct intel_gt *gt, + #ifdef CONFIG_PREEMPT_RT ++DECLARE_PER_CPU(struct task_struct *, timersd); + extern void raise_timer_softirq(void); + extern void raise_hrtimer_softirq(void); - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_MEDIA | GRDOM_RESET_ENABLE); -- ret = wait_for_atomic(g4x_reset_complete(pdev), 50); -+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); - if (ret) { - GT_TRACE(gt, "Wait for media reset failed\n"); - goto out; -@@ -215,7 +215,7 @@ static int g4x_do_reset(struct intel_gt *gt, - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_RENDER | GRDOM_RESET_ENABLE); -- ret = wait_for_atomic(g4x_reset_complete(pdev), 50); -+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); - if (ret) { - GT_TRACE(gt, "Wait for render reset failed\n"); - goto out; -@@ -785,9 +785,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) - reset_mask = wa_14015076503_start(gt, engine_mask, !retry); - - GT_TRACE(gt, "engine_mask=%x\n", reset_mask); -- preempt_disable(); - ret = reset(gt, reset_mask, retry); -- preempt_enable(); - - wa_14015076503_end(gt, reset_mask); +diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c +index 46612fb15fc6..f893f4cfdd38 100644 +--- a/kernel/rcu/rcutorture.c ++++ b/kernel/rcu/rcutorture.c +@@ -2409,6 +2409,12 @@ static int rcutorture_booster_init(unsigned int cpu) + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); ++#ifdef CONFIG_PREEMPT_RT ++ t = per_cpu(timersd, cpu); ++ WARN_ON_ONCE(!t); ++ sp.sched_priority = 2; ++ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); ++#endif } -diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h -index 8dc291ff0..5b8d084c9 100644 ---- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h -+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h -@@ -317,7 +317,7 @@ static inline int intel_guc_send_busy_loop(struct intel_guc *guc, - { - int err; - unsigned int sleep_period_ms = 1; -- bool not_atomic = !in_atomic() && !irqs_disabled(); -+ bool not_atomic = !in_atomic() && !irqs_disabled() && !rcu_preempt_depth(); - - /* - * FIXME: Have caller pass in if we are in an atomic context to avoid -diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c -index f59081066..014d02029 100644 ---- a/drivers/gpu/drm/i915/i915_request.c -+++ b/drivers/gpu/drm/i915/i915_request.c -@@ -609,7 +609,6 @@ bool __i915_request_submit(struct i915_request *request) - - RQ_TRACE(request, "\n"); - -- GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->sched_engine->lock); - - /* -@@ -718,7 +717,6 @@ void __i915_request_unsubmit(struct i915_request *request) - */ - RQ_TRACE(request, "\n"); -- GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->sched_engine->lock); + /* Don't allow time recalculation while creating a new task. */ +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 25d007388f41..83792463e9de 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -649,7 +649,7 @@ static inline void tick_irq_exit(void) + } - /* -diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h -index ce1cbee1b..3c51620d0 100644 ---- a/drivers/gpu/drm/i915/i915_trace.h -+++ b/drivers/gpu/drm/i915/i915_trace.h -@@ -6,6 +6,10 @@ - #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) - #define _I915_TRACE_H_ + #ifdef CONFIG_PREEMPT_RT +-static DEFINE_PER_CPU(struct task_struct *, timersd); ++DEFINE_PER_CPU(struct task_struct *, timersd); + static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + + static unsigned int local_pending_timers(void) +-- +2.51.0 + +From 127a53e2deae0f6ab4c8b4b3553cfd6f5f55140e Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Tue, 5 Apr 2022 03:07:52 +0200 +Subject: [PATCH 021/213] tick: Fix timer storm since introduction of timersd + +If timers are pending while the tick is reprogrammed on nohz_mode, the +next expiry is not armed to fire now, it is delayed one jiffy forward +instead so as not to raise an inextinguishable timer storm with such +scenario: + +1) IRQ triggers and queue a timer +2) ksoftirqd() is woken up +3) IRQ tail: timer is reprogrammed to fire now +4) IRQ exit +5) TIMER interrupt +6) goto 3) + +...all that until we finally reach ksoftirqd. + +Unfortunately we are checking the wrong softirq vector bitmask since +timersd kthread has split from ksoftirqd. Timers now have their own +vector state field that must be checked separately. As a result, the +old timer storm is back. This shows up early on boot with extremely long +initcalls: + + [ 333.004807] initcall dquot_init+0x0/0x111 returned 0 after 323822879 usecs + +and the cause is uncovered with the right trace events showing just +10 microseconds between ticks (~100 000 Hz): + +|swapper/-1 1dn.h111 60818582us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415486608 +|swapper/-1 1dn.h111 60818592us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415496082 +|swapper/-1 1dn.h111 60818601us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415505550 + +Fix this by checking the right timer vector state from the nohz code. + +Signed-off-by: Frederic Weisbecker +Cc: Mel Gorman +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org +--- + include/linux/interrupt.h | 12 ++++++++++++ + kernel/softirq.c | 7 +------ + kernel/time/tick-sched.c | 2 +- + 3 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index c84f90a15f48..72e300db1833 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -611,9 +611,16 @@ extern void raise_softirq(unsigned int nr); -+#ifdef CONFIG_PREEMPT_RT -+#define NOTRACE -+#endif + #ifdef CONFIG_PREEMPT_RT + DECLARE_PER_CPU(struct task_struct *, timersd); ++DECLARE_PER_CPU(unsigned long, pending_timer_softirq); + - #include - #include - #include -@@ -322,7 +326,7 @@ DEFINE_EVENT(i915_request, i915_request_add, - TP_ARGS(rq) - ); - --#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) -+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) - DEFINE_EVENT(i915_request, i915_request_guc_submit, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) -diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h -index c61066498..48e19e55d 100644 ---- a/drivers/gpu/drm/i915/i915_utils.h -+++ b/drivers/gpu/drm/i915/i915_utils.h -@@ -288,7 +288,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) - #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + extern void raise_timer_softirq(void); + extern void raise_hrtimer_softirq(void); - /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ --#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) -+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) - # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) ++static inline unsigned int local_pending_timers(void) ++{ ++ return __this_cpu_read(pending_timer_softirq); ++} ++ #else - # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) -diff --git a/drivers/tty/serial/21285.c b/drivers/tty/serial/21285.c -index d756fcc88..4de0c975e 100644 ---- a/drivers/tty/serial/21285.c -+++ b/drivers/tty/serial/21285.c -@@ -185,14 +185,14 @@ static void serial21285_break_ctl(struct uart_port *port, int break_state) - unsigned long flags; - unsigned int h_lcr; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - h_lcr = *CSR_H_UBRLCR; - if (break_state) - h_lcr |= H_UBRLCR_BREAK; - else - h_lcr &= ~H_UBRLCR_BREAK; - *CSR_H_UBRLCR = h_lcr; -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); + static inline void raise_timer_softirq(void) + { +@@ -624,6 +631,11 @@ static inline void raise_hrtimer_softirq(void) + { + raise_softirq_irqoff(HRTIMER_SOFTIRQ); } ++ ++static inline unsigned int local_pending_timers(void) ++{ ++ return local_softirq_pending(); ++} + #endif - static int serial21285_startup(struct uart_port *port) -@@ -272,7 +272,7 @@ serial21285_set_termios(struct uart_port *port, struct ktermios *termios, - if (port->fifosize) - h_lcr |= H_UBRLCR_FIFO; + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 83792463e9de..0959e21d5fa7 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -650,12 +650,7 @@ static inline void tick_irq_exit(void) -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); + #ifdef CONFIG_PREEMPT_RT + DEFINE_PER_CPU(struct task_struct *, timersd); +-static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); +- +-static unsigned int local_pending_timers(void) +-{ +- return __this_cpu_read(pending_timer_softirq); +-} ++DEFINE_PER_CPU(unsigned long, pending_timer_softirq); - /* - * Update the per-port timeout. -@@ -309,7 +309,7 @@ serial21285_set_termios(struct uart_port *port, struct ktermios *termios, - *CSR_H_UBRLCR = h_lcr; - *CSR_UARTCON = 1; + static void wake_timersd(void) + { +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index 55cbc49f70d1..1a0ed106b192 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -795,7 +795,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); + static inline bool local_timer_softirq_pending(void) + { +- return local_softirq_pending() & BIT(TIMER_SOFTIRQ); ++ return local_pending_timers() & BIT(TIMER_SOFTIRQ); } - static const char *serial21285_type(struct uart_port *port) -diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c -index 4a9e71b2d..021949f25 100644 ---- a/drivers/tty/serial/8250/8250_aspeed_vuart.c -+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c -@@ -288,9 +288,9 @@ static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle) - struct uart_8250_port *up = up_to_u8250p(port); - unsigned long flags; + static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) +-- +2.51.0 + +From 807c0bd22140540e9d04c30cbe22cb6bfb71453f Mon Sep 17 00:00:00 2001 +From: Junxiao Chang +Date: Mon, 20 Feb 2023 09:12:20 +0100 +Subject: [PATCH 022/213] softirq: Wake ktimers thread also in softirq. + +If the hrtimer is raised while a softirq is processed then it does not +wake the corresponding ktimers thread. This is due to the optimisation in the +irq-exit path which is also used to wake the ktimers thread. For the other +softirqs, this is okay because the additional softirq bits will be handled by +the currently running softirq handler. +The timer related softirq bits are added to a different variable and rely on +the ktimers thread. +As a consuequence the wake up of ktimersd is delayed until the next timer tick. + +Always wake the ktimers thread if a timer related softirq is pending. + +Reported-by: Peh, Hock Zhang +Signed-off-by: Junxiao Chang +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/softirq.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 0959e21d5fa7..a41505107731 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -675,13 +675,12 @@ static inline void __irq_exit_rcu(void) + #endif + account_hardirq_exit(current); + preempt_count_sub(HARDIRQ_OFFSET); +- if (!in_interrupt()) { +- if (local_softirq_pending()) +- invoke_softirq(); ++ if (!in_interrupt() && local_softirq_pending()) ++ invoke_softirq(); + +- if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers()) +- wake_timersd(); +- } ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() && ++ !(in_nmi() | in_hardirq())) ++ wake_timersd(); -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - __aspeed_vuart_set_throttle(up, throttle); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); + tick_irq_exit(); } +-- +2.51.0 + +From 72619b2bbb137006ca6b82ce895a0f4a6023cd83 Mon Sep 17 00:00:00 2001 +From: Mike Galbraith +Date: Thu, 31 Mar 2016 04:08:28 +0200 +Subject: [PATCH 023/213] zram: Replace bit spinlocks with spinlock_t for + PREEMPT_RT. + +The bit spinlock disables preemption. The spinlock_t lock becomes a sleeping +lock on PREEMPT_RT and it can not be acquired in this context. In this locked +section, zs_free() acquires a zs_pool::lock, and there is access to +zram::wb_limit_lock. + +Use a spinlock_t on PREEMPT_RT for locking and set/ clear ZRAM_LOCK bit after +the lock has been acquired/ dropped. + +Signed-off-by: Mike Galbraith +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lkml.kernel.org/r/YqIbMuHCPiQk+Ac2@linutronix.de +Link: https://lore.kernel.org/20230323161830.jFbWCosd@linutronix.de +--- + drivers/block/zram/zram_drv.c | 37 +++++++++++++++++++++++++++++++++++ + drivers/block/zram/zram_drv.h | 3 +++ + 2 files changed, 40 insertions(+) + +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index 44cf0e51d7db..adb645de064d 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -57,6 +57,41 @@ static void zram_free_page(struct zram *zram, size_t index); + static int zram_read_page(struct zram *zram, struct page *page, u32 index, + struct bio *parent); - static void aspeed_vuart_throttle(struct uart_port *port) -@@ -340,7 +340,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) - if (iir & UART_IIR_NO_INT) - return 0; ++#ifdef CONFIG_PREEMPT_RT ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) ++{ ++ size_t index; ++ ++ for (index = 0; index < num_pages; index++) ++ spin_lock_init(&zram->table[index].lock); ++} ++ ++static int zram_slot_trylock(struct zram *zram, u32 index) ++{ ++ int ret; ++ ++ ret = spin_trylock(&zram->table[index].lock); ++ if (ret) ++ __set_bit(ZRAM_LOCK, &zram->table[index].flags); ++ return ret; ++} ++ ++static void zram_slot_lock(struct zram *zram, u32 index) ++{ ++ spin_lock(&zram->table[index].lock); ++ __set_bit(ZRAM_LOCK, &zram->table[index].flags); ++} ++ ++static void zram_slot_unlock(struct zram *zram, u32 index) ++{ ++ __clear_bit(ZRAM_LOCK, &zram->table[index].flags); ++ spin_unlock(&zram->table[index].lock); ++} ++ ++#else ++ ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } ++ + static int zram_slot_trylock(struct zram *zram, u32 index) + { + return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); +@@ -71,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) + { + bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); + } ++#endif -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); + static inline bool init_done(struct zram *zram) + { +@@ -1268,6 +1304,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) - lsr = serial_port_in(port, UART_LSR); + if (!huge_class_size) + huge_class_size = zs_huge_class_size(zram->mem_pool); ++ zram_meta_init_table_locks(zram, num_pages); + return true; + } +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index 35e322144629..4e0c7c6d4813 100644 +--- a/drivers/block/zram/zram_drv.h ++++ b/drivers/block/zram/zram_drv.h +@@ -69,6 +69,9 @@ struct zram_table_entry { + unsigned long element; + }; + unsigned long flags; ++#ifdef CONFIG_PREEMPT_RT ++ spinlock_t lock; ++#endif + #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME + ktime_t ac_time; + #endif +-- +2.51.0 + +From 764d13090cd412e3ed8afe84421d77281547c92b Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 8 Mar 2023 16:29:38 +0100 +Subject: [PATCH 024/213] preempt: Put preempt_enable() within an + instrumentation*() section. + +Callers of preempt_enable() can be within an noinstr section leading to: +| vmlinux.o: warning: objtool: native_sched_clock+0x97: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section +| vmlinux.o: warning: objtool: kvm_clock_read+0x22: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section +| vmlinux.o: warning: objtool: local_clock+0xb4: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section +| vmlinux.o: warning: objtool: enter_from_user_mode+0xea: call to preempt_schedule_thunk() leaves .noinstr.text section +| vmlinux.o: warning: objtool: syscall_enter_from_user_mode+0x140: call to preempt_schedule_thunk() leaves .noinstr.text section +| vmlinux.o: warning: objtool: syscall_enter_from_user_mode_prepare+0xf2: call to preempt_schedule_thunk() leaves .noinstr.text section +| vmlinux.o: warning: objtool: irqentry_enter_from_user_mode+0xea: call to preempt_schedule_thunk() leaves .noinstr.text section + +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20230309072724.3F6zRkvw@linutronix.de +--- + include/linux/preempt.h | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 9aa6358a1a16..cd16f0330fba 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -230,15 +230,21 @@ do { \ + #define preempt_enable() \ + do { \ + barrier(); \ +- if (unlikely(preempt_count_dec_and_test())) \ ++ if (unlikely(preempt_count_dec_and_test())) { \ ++ instrumentation_begin(); \ + __preempt_schedule(); \ ++ instrumentation_end(); \ ++ } \ + } while (0) + + #define preempt_enable_notrace() \ + do { \ + barrier(); \ +- if (unlikely(__preempt_count_dec_and_test())) \ ++ if (unlikely(__preempt_count_dec_and_test())) { \ ++ instrumentation_begin(); \ + __preempt_schedule_notrace(); \ ++ instrumentation_end(); \ ++ } \ + } while (0) + + #define preempt_check_resched() \ +-- +2.51.0 + +From c9863728e966402287407e8dcab333966377c76f Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 4 Aug 2023 13:30:37 +0200 +Subject: [PATCH 025/213] sched/core: Provide a method to check if a task is + PI-boosted. + +Provide a method to check if a task inherited the priority from another +task. This happens if a task owns a lock which is requested by a task +with higher priority. This can be used as a hint to add a preemption +point to the critical section. + +Provide a function which reports true if the task is PI-boosted. + +Link: https://lore.kernel.org/r/20230804113039.419794-2-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/sched.h | 1 + + kernel/sched/core.c | 15 +++++++++++++++ + 2 files changed, 16 insertions(+) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 87f1bf06857e..6f27f5e1effe 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1910,6 +1910,7 @@ static inline int dl_task_check_affinity(struct task_struct *p, const struct cpu + } + #endif + ++extern bool task_is_pi_boosted(const struct task_struct *p); + extern int yield_to(struct task_struct *p, bool preempt); + extern void set_user_nice(struct task_struct *p, long nice); + extern int task_prio(const struct task_struct *p); +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 1b7afbd306c3..f2e784d966b2 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8918,6 +8918,21 @@ static inline void preempt_dynamic_init(void) { } + + #endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ + ++/* ++ * task_is_pi_boosted - Check if task has been PI boosted. ++ * @p: Task to check. ++ * ++ * Return true if task is subject to priority inheritance. ++ */ ++bool task_is_pi_boosted(const struct task_struct *p) ++{ ++ int prio = p->prio; ++ ++ if (!rt_prio(prio)) ++ return false; ++ return prio != p->normal_prio; ++} ++ + /** + * yield - yield the current processor to other threads. + * +-- +2.51.0 + +From 3ab657b4d08ef776fb36356d61e1066a85e3e9a5 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 4 Aug 2023 13:30:38 +0200 +Subject: [PATCH 026/213] softirq: Add function to preempt serving softirqs. + +Add a functionality for the softirq handler to preempt its current work +if needed. The softirq core has no particular state. It reads and resets +the pending softirq bits and then processes one after the other. +It can already be preempted while it invokes a certain softirq handler. + +By enabling the BH the softirq core releases the per-CPU bh lock which +serializes all softirq handler. It is safe to do as long as the code +does not expect any serialisation in between. A typical scenarion would +after the invocation of callback where no state needs to be preserved +before the next callback is invoked. + +Add functionaliry to preempt the serving softirqs. + +Link: https://lore.kernel.org/r/20230804113039.419794-3-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/bottom_half.h | 2 ++ + kernel/softirq.c | 13 +++++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h +index fc53e0ad56d9..448bbef47456 100644 +--- a/include/linux/bottom_half.h ++++ b/include/linux/bottom_half.h +@@ -35,8 +35,10 @@ static inline void local_bh_enable(void) + + #ifdef CONFIG_PREEMPT_RT + extern bool local_bh_blocked(void); ++extern void softirq_preempt(void); + #else + static inline bool local_bh_blocked(void) { return false; } ++static inline void softirq_preempt(void) { } + #endif + + #endif /* _LINUX_BH_H */ +diff --git a/kernel/softirq.c b/kernel/softirq.c +index a41505107731..a7324be257d8 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -263,6 +263,19 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) + } + EXPORT_SYMBOL(__local_bh_enable_ip); + ++void softirq_preempt(void) ++{ ++ if (WARN_ON_ONCE(!preemptible())) ++ return; ++ ++ if (WARN_ON_ONCE(__this_cpu_read(softirq_ctrl.cnt) != SOFTIRQ_OFFSET)) ++ return; ++ ++ __local_bh_enable(SOFTIRQ_OFFSET, true); ++ /* preemption point */ ++ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); ++} ++ + /* + * Invoked from ksoftirqd_run() outside of the interrupt disabled section + * to acquire the per CPU local lock for reentrancy protection. +-- +2.51.0 + +From 6d794f139fc6c37a579600221a377dcfbbbbec2e Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 4 Aug 2023 13:30:39 +0200 +Subject: [PATCH 027/213] time: Allow to preempt after a callback. + +The TIMER_SOFTIRQ handler invokes timer callbacks of the expired timers. +Before each invocation the timer_base::lock is dropped. The only lock +that is still held is the timer_base::expiry_lock and the per-CPU +bh-lock as part of local_bh_disable(). The former is released as part +of lock up prevention if the timer is preempted by the caller which is +waiting for its completion. + +Both locks are already released as part of timer_sync_wait_running(). +This can be extended by also releasing in bh-lock. The timer core does +not rely on any state that is serialized by the bh-lock. The timer +callback expects the bh-state to be serialized by the lock but there is +no need to keep state synchronized while invoking multiple callbacks. + +Preempt handling softirqs and release all locks after a timer invocation +if the current has inherited priority. + +Link: https://lore.kernel.org/r/20230804113039.419794-4-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/timer.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 7cad6fe3c035..b3fbe97d1e34 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1470,9 +1470,16 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) + */ + static void timer_sync_wait_running(struct timer_base *base) + { +- if (atomic_read(&base->timer_waiters)) { ++ bool need_preempt; ++ ++ need_preempt = task_is_pi_boosted(current); ++ if (need_preempt || atomic_read(&base->timer_waiters)) { + raw_spin_unlock_irq(&base->lock); + spin_unlock(&base->expiry_lock); ++ ++ if (need_preempt) ++ softirq_preempt(); ++ + spin_lock(&base->expiry_lock); + raw_spin_lock_irq(&base->lock); + } +-- +2.51.0 + +From fca63332c46fdee7526a6cddf82fe6037190daaf Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:19 +0206 +Subject: [PATCH 028/213] serial: core: Use lock wrappers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ilpo Järvinen +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-3-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/serial_core.h | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index 052df85dfd59..71d925e8a79b 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -1078,14 +1078,14 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) + u8 sysrq_ch; + + if (!port->has_sysrq) { +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +@@ -1097,14 +1097,14 @@ static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port + u8 sysrq_ch; + + if (!port->has_sysrq) { +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +@@ -1120,12 +1120,12 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) + } + static inline void uart_unlock_and_check_sysrq(struct uart_port *port) + { +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + } + static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) + { +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } + #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ + +-- +2.51.0 + +From 7bb9f688b9361d2814eb8bfcfa31810b2c7954d6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:20 +0206 +Subject: [PATCH 029/213] serial: 21285: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-4-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/21285.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/21285.c b/drivers/tty/serial/21285.c +index d756fcc884cb..4de0c975ebdc 100644 +--- a/drivers/tty/serial/21285.c ++++ b/drivers/tty/serial/21285.c +@@ -185,14 +185,14 @@ static void serial21285_break_ctl(struct uart_port *port, int break_state) + unsigned long flags; + unsigned int h_lcr; + +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + h_lcr = *CSR_H_UBRLCR; + if (break_state) + h_lcr |= H_UBRLCR_BREAK; + else + h_lcr &= ~H_UBRLCR_BREAK; + *CSR_H_UBRLCR = h_lcr; +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } + + static int serial21285_startup(struct uart_port *port) +@@ -272,7 +272,7 @@ serial21285_set_termios(struct uart_port *port, struct ktermios *termios, + if (port->fifosize) + h_lcr |= H_UBRLCR_FIFO; + +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + + /* + * Update the per-port timeout. +@@ -309,7 +309,7 @@ serial21285_set_termios(struct uart_port *port, struct ktermios *termios, + *CSR_H_UBRLCR = h_lcr; + *CSR_UARTCON = 1; + +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } + + static const char *serial21285_type(struct uart_port *port) +-- +2.51.0 + +From 850bb084ec650b11964dd3eea5fef8fcd517ca4d Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:21 +0206 +Subject: [PATCH 030/213] serial: 8250_aspeed_vuart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-5-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_aspeed_vuart.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c +index 4a9e71b2dbbc..021949f252f8 100644 +--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c ++++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c +@@ -288,9 +288,9 @@ static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle) + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + __aspeed_vuart_set_throttle(up, throttle); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } + + static void aspeed_vuart_throttle(struct uart_port *port) +@@ -340,7 +340,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) + if (iir & UART_IIR_NO_INT) + return 0; + +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + + lsr = serial_port_in(port, UART_LSR); + +-- +2.51.0 + +From 31cfb4adc7912e77ec4d0f3eea0c7a9d043a612e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:22 +0206 +Subject: [PATCH 031/213] serial: 8250_bcm7271: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-6-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_bcm7271.c | 28 +++++++++++++------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c -index 9afd5979c..db23b3a02 100644 +index 9afd5979c9e0..db23b3a02aef 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -567,7 +567,7 @@ static irqreturn_t brcmuart_isr(int irq, void *dev_id) @@ -1510,8 +2845,54 @@ index 9afd5979c..db23b3a02 100644 } return 0; +-- +2.51.0 + +From 5de5f3ead4ee5dc513b0aadae6047f7bcc199dab Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:23 +0206 +Subject: [PATCH 032/213] serial: 8250: Use port lock wrappers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ilpo Järvinen +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-7-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_core.c | 12 ++-- + drivers/tty/serial/8250/8250_port.c | 98 ++++++++++++++--------------- + 2 files changed, 55 insertions(+), 55 deletions(-) + diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c -index c536028e9..efb6f1c19 100644 +index c536028e92dc..677b4aaf1f0c 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -259,7 +259,7 @@ static void serial8250_backup_timeout(struct timer_list *t) @@ -1532,349 +2913,905 @@ index c536028e9..efb6f1c19 100644 /* Standard timer interval plus 0.2s to keep the port running */ mod_timer(&up->timer, -@@ -592,6 +592,7 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) +@@ -992,11 +992,11 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work) + struct uart_port *port = &up->port; + unsigned long flags; - #ifdef CONFIG_SERIAL_8250_CONSOLE +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + up->port.read_status_mask |= UART_LSR_DR; + serial_out(up, UART_IER, up->ier); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } -+#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE - static void univ8250_console_write(struct console *co, const char *s, - unsigned int count) - { -@@ -599,6 +600,37 @@ static void univ8250_console_write(struct console *co, const char *s, + /** +@@ -1197,9 +1197,9 @@ void serial8250_unregister_port(int line) + if (uart->em485) { + unsigned long flags; - serial8250_console_write(up, s, count); - } -+#else -+static bool univ8250_console_write_atomic(struct console *co, -+ struct nbcon_write_context *wctxt) -+{ -+ struct uart_8250_port *up = &serial8250_ports[co->index]; -+ -+ return serial8250_console_write_atomic(up, wctxt); -+} -+ -+static bool univ8250_console_write_thread(struct console *co, -+ struct nbcon_write_context *wctxt) -+{ -+ struct uart_8250_port *up = &serial8250_ports[co->index]; -+ -+ return serial8250_console_write_thread(up, wctxt); -+} -+ -+static void univ8250_console_driver_enter(struct console *con, unsigned long *flags) -+{ -+ struct uart_port *up = &serial8250_ports[con->index].port; -+ -+ __uart_port_lock_irqsave(up, flags); -+} -+ -+static void univ8250_console_driver_exit(struct console *con, unsigned long flags) -+{ -+ struct uart_port *up = &serial8250_ports[con->index].port; -+ -+ __uart_port_unlock_irqrestore(up, flags); -+} -+#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */ +- spin_lock_irqsave(&uart->port.lock, flags); ++ uart_port_lock_irqsave(&uart->port, &flags); + serial8250_em485_destroy(uart); +- spin_unlock_irqrestore(&uart->port.lock, flags); ++ uart_port_unlock_irqrestore(&uart->port, flags); + } - static int univ8250_console_setup(struct console *co, char *options) + uart_remove_one_port(&serial8250_reg, &uart->port); +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c +index 23aed9e89e30..6935bbf8ea28 100644 +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -689,7 +689,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + + if (p->capabilities & UART_CAP_SLEEP) { + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&p->port.lock); ++ uart_port_lock_irq(&p->port); + if (p->capabilities & UART_CAP_EFR) { + lcr = serial_in(p, UART_LCR); + efr = serial_in(p, UART_EFR); +@@ -703,7 +703,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + serial_out(p, UART_EFR, efr); + serial_out(p, UART_LCR, lcr); + } +- spin_unlock_irq(&p->port.lock); ++ uart_port_unlock_irq(&p->port); + } + + serial8250_rpm_put(p); +@@ -746,9 +746,9 @@ static void enable_rsa(struct uart_8250_port *up) { -@@ -698,12 +730,20 @@ static int univ8250_console_match(struct console *co, char *name, int idx, + if (up->port.type == PORT_RSA) { + if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) { +- spin_lock_irq(&up->port.lock); ++ uart_port_lock_irq(&up->port); + __enable_rsa(up); +- spin_unlock_irq(&up->port.lock); ++ uart_port_unlock_irq(&up->port); + } + if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) + serial_out(up, UART_RSA_FRR, 0); +@@ -768,7 +768,7 @@ static void disable_rsa(struct uart_8250_port *up) - static struct console univ8250_console = { - .name = "ttyS", -+#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE - .write = univ8250_console_write, -+ .flags = CON_PRINTBUFFER | CON_ANYTIME, -+#else -+ .write_atomic = univ8250_console_write_atomic, -+ .write_thread = univ8250_console_write_thread, -+ .driver_enter = univ8250_console_driver_enter, -+ .driver_exit = univ8250_console_driver_exit, -+ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON, -+#endif - .device = uart_console_device, - .setup = univ8250_console_setup, - .exit = univ8250_console_exit, - .match = univ8250_console_match, -- .flags = CON_PRINTBUFFER | CON_ANYTIME, - .index = -1, - .data = &serial8250_reg, - }; -@@ -992,11 +1032,11 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work) - struct uart_port *port = &up->port; - unsigned long flags; + if (up->port.type == PORT_RSA && + up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) { +- spin_lock_irq(&up->port.lock); ++ uart_port_lock_irq(&up->port); + mode = serial_in(up, UART_RSA_MSR); + result = !(mode & UART_RSA_MSR_FIFO); +@@ -781,7 +781,7 @@ static void disable_rsa(struct uart_8250_port *up) + + if (result) + up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; +- spin_unlock_irq(&up->port.lock); ++ uart_port_unlock_irq(&up->port); + } + } + #endif /* CONFIG_SERIAL_8250_RSA */ +@@ -1172,7 +1172,7 @@ static void autoconfig(struct uart_8250_port *up) + * + * Synchronize UART_IER access against the console. + */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - up->ier |= UART_IER_RLSI | UART_IER_RDI; - up->port.read_status_mask |= UART_LSR_DR; - serial_out(up, UART_IER, up->ier); + + up->capabilities = 0; + up->bugs = 0; +@@ -1211,7 +1211,7 @@ static void autoconfig(struct uart_8250_port *up) + /* + * We failed; there's nothing here + */ +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + DEBUG_AUTOCONF("IER test failed (%02x, %02x) ", + scratch2, scratch3); + goto out; +@@ -1235,7 +1235,7 @@ static void autoconfig(struct uart_8250_port *up) + status1 = serial_in(up, UART_MSR) & UART_MSR_STATUS_BITS; + serial8250_out_MCR(up, save_mcr); + if (status1 != (UART_MSR_DCD | UART_MSR_CTS)) { +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + DEBUG_AUTOCONF("LOOP test failed (%02x) ", + status1); + goto out; +@@ -1304,7 +1304,7 @@ static void autoconfig(struct uart_8250_port *up) + serial8250_clear_IER(up); + + out_unlock: - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - } - /** -@@ -1197,9 +1237,9 @@ void serial8250_unregister_port(int line) - if (uart->em485) { - unsigned long flags; + /* + * Check if the device is a Fintek F81216A +@@ -1341,9 +1341,9 @@ static void autoconfig_irq(struct uart_8250_port *up) + probe_irq_off(probe_irq_on()); + save_mcr = serial8250_in_MCR(up); + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + save_ier = serial_in(up, UART_IER); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + serial8250_out_MCR(up, UART_MCR_OUT1 | UART_MCR_OUT2); -- spin_lock_irqsave(&uart->port.lock, flags); -+ uart_port_lock_irqsave(&uart->port, &flags); - serial8250_em485_destroy(uart); -- spin_unlock_irqrestore(&uart->port.lock, flags); -+ uart_port_unlock_irqrestore(&uart->port, flags); + irqs = probe_irq_on(); +@@ -1356,9 +1356,9 @@ static void autoconfig_irq(struct uart_8250_port *up) + UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2); } + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + serial_out(up, UART_IER, UART_IER_ALL_INTR); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + serial_in(up, UART_LSR); + serial_in(up, UART_RX); + serial_in(up, UART_IIR); +@@ -1369,9 +1369,9 @@ static void autoconfig_irq(struct uart_8250_port *up) - uart_remove_one_port(&serial8250_reg, &uart->port); -diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c -index 7f2303781..cf51976ff 100644 ---- a/drivers/tty/serial/8250/8250_dma.c -+++ b/drivers/tty/serial/8250/8250_dma.c -@@ -22,7 +22,7 @@ static void __dma_tx_complete(void *param) - dma_sync_single_for_cpu(dma->txchan->device->dev, dma->tx_addr, - UART_XMIT_SIZE, DMA_TO_DEVICE); + serial8250_out_MCR(up, save_mcr); + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + serial_out(up, UART_IER, save_ier); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + + if (port->flags & UPF_FOURPORT) + outb_p(save_ICP, ICP); +@@ -1436,13 +1436,13 @@ static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t) + unsigned long flags; + serial8250_rpm_get(p); - spin_lock_irqsave(&p->port.lock, flags); + uart_port_lock_irqsave(&p->port, &flags); - - dma->tx_running = 0; - -@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *param) - if (ret || !dma->tx_running) - serial8250_set_THRI(p); - + if (em485->active_timer == &em485->stop_tx_timer) { + p->rs485_stop_tx(p); + em485->active_timer = NULL; + em485->tx_stopped = true; + } - spin_unlock_irqrestore(&p->port.lock, flags); + uart_port_unlock_irqrestore(&p->port, flags); - } + serial8250_rpm_put(p); - static void __dma_rx_complete(struct uart_8250_port *p) -@@ -70,7 +70,7 @@ static void dma_rx_complete(void *param) - struct uart_8250_dma *dma = p->dma; + return HRTIMER_NORESTART; +@@ -1624,12 +1624,12 @@ static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t) + struct uart_8250_port *p = em485->port; unsigned long flags; - spin_lock_irqsave(&p->port.lock, flags); + uart_port_lock_irqsave(&p->port, &flags); - if (dma->rx_running) - __dma_rx_complete(p); - -@@ -80,7 +80,7 @@ static void dma_rx_complete(void *param) - */ - if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR)) - p->dma->rx_dma(p); + if (em485->active_timer == &em485->start_tx_timer) { + __start_tx(&p->port); + em485->active_timer = NULL; + } - spin_unlock_irqrestore(&p->port.lock, flags); + uart_port_unlock_irqrestore(&p->port, flags); + + return HRTIMER_NORESTART; } +@@ -1912,7 +1912,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) + if (iir & UART_IIR_NO_INT) + return 0; - int serial8250_tx_dma(struct uart_8250_port *p) -diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c -index eaf4a9073..d12f230f1 100644 ---- a/drivers/tty/serial/8250/8250_dw.c -+++ b/drivers/tty/serial/8250/8250_dw.c -@@ -290,20 +290,20 @@ static int dw8250_handle_irq(struct uart_port *p) - * so we limit the workaround only to non-DMA mode. - */ - if (!up->dma && rx_timeout) { -- spin_lock_irqsave(&p->lock, flags); -+ uart_port_lock_irqsave(p, &flags); - status = serial_lsr_in(up); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - if (!(status & (UART_LSR_DR | UART_LSR_BI))) - (void) p->serial_in(p, UART_RX); + status = serial_lsr_in(up); -- spin_unlock_irqrestore(&p->lock, flags); -+ uart_port_unlock_irqrestore(p, flags); - } +@@ -1982,9 +1982,9 @@ static int serial8250_tx_threshold_handle_irq(struct uart_port *port) + if ((iir & UART_IIR_ID) == UART_IIR_THRI) { + struct uart_8250_port *up = up_to_u8250p(port); - /* Manually stop the Rx DMA transfer when acting as flow controller */ - if (quirks & DW_UART_QUIRK_IS_DMA_FC && up->dma && up->dma->rx_running && rx_timeout) { -- spin_lock_irqsave(&p->lock, flags); -+ uart_port_lock_irqsave(p, &flags); - status = serial_lsr_in(up); -- spin_unlock_irqrestore(&p->lock, flags); -+ uart_port_unlock_irqrestore(p, flags); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + serial8250_tx_chars(up); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } - if (status & (UART_LSR_DR | UART_LSR_BI)) { - dw8250_writel_ext(p, RZN1_UART_RDMACR, 0); -diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c -index 27430fdd9..17be6ad24 100644 ---- a/drivers/tty/serial/8250/8250_exar.c -+++ b/drivers/tty/serial/8250/8250_exar.c -@@ -243,9 +243,9 @@ static int xr17v35x_startup(struct uart_port *port) - * - * Synchronize UART_IER access against the console. - */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - serial_port_out(port, UART_IER, 0); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); + iir = serial_port_in(port, UART_IIR); +@@ -1999,10 +1999,10 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) - return serial8250_do_startup(port); - } -diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c -index 6af4e1c12..f522eb502 100644 ---- a/drivers/tty/serial/8250/8250_fsl.c -+++ b/drivers/tty/serial/8250/8250_fsl.c -@@ -30,11 +30,11 @@ int fsl8250_handle_irq(struct uart_port *port) - unsigned int iir; - struct uart_8250_port *up = up_to_u8250p(port); + serial8250_rpm_get(up); -- spin_lock_irqsave(&up->port.lock, flags); -+ uart_port_lock_irqsave(&up->port, &flags); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + if (!serial8250_tx_dma_running(up) && uart_lsr_tx_empty(serial_lsr_in(up))) + result = TIOCSER_TEMT; +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); - iir = port->serial_in(port, UART_IIR); - if (iir & UART_IIR_NO_INT) { -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); - return 0; - } + serial8250_rpm_put(up); -@@ -54,7 +54,7 @@ int fsl8250_handle_irq(struct uart_port *port) - if (unlikely(up->lsr_saved_flags & UART_LSR_BI)) { - up->lsr_saved_flags &= ~UART_LSR_BI; - port->serial_in(port, UART_RX); -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); - return 1; +@@ -2064,13 +2064,13 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state) + unsigned long flags; + + serial8250_rpm_get(up); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + if (break_state == -1) + up->lcr |= UART_LCR_SBC; + else + up->lcr &= ~UART_LCR_SBC; + serial_port_out(port, UART_LCR, up->lcr); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + serial8250_rpm_put(up); + } + +@@ -2206,7 +2206,7 @@ int serial8250_do_startup(struct uart_port *port) + * + * Synchronize UART_IER access against the console. + */ +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + up->acr = 0; + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); +@@ -2216,7 +2216,7 @@ int serial8250_do_startup(struct uart_port *port) + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); + serial_port_out(port, UART_LCR, 0); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); } -diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c -index 28f9a2679..33699e86e 100644 ---- a/drivers/tty/serial/8250/8250_mtk.c -+++ b/drivers/tty/serial/8250/8250_mtk.c -@@ -102,7 +102,7 @@ static void mtk8250_dma_rx_complete(void *param) - if (data->rx_status == DMA_RX_SHUTDOWN) - return; + if (port->type == PORT_DA830) { +@@ -2225,10 +2225,10 @@ int serial8250_do_startup(struct uart_port *port) + * + * Synchronize UART_IER access against the console. + */ +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + serial_port_out(port, UART_IER, 0); + serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + mdelay(10); -- spin_lock_irqsave(&up->port.lock, flags); -+ uart_port_lock_irqsave(&up->port, &flags); + /* Enable Tx, Rx and free run mode */ +@@ -2342,7 +2342,7 @@ int serial8250_do_startup(struct uart_port *port) + * + * Synchronize UART_IER access against the console. + */ +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); - total = dma->rx_size - state.residue; -@@ -128,7 +128,7 @@ static void mtk8250_dma_rx_complete(void *param) + wait_for_xmitr(up, UART_LSR_THRE); + serial_port_out_sync(port, UART_IER, UART_IER_THRI); +@@ -2354,7 +2354,7 @@ int serial8250_do_startup(struct uart_port *port) + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); - mtk8250_rx_dma(up); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); - } + if (port->irqflags & IRQF_SHARED) + enable_irq(port->irq); +@@ -2422,7 +2422,7 @@ int serial8250_do_startup(struct uart_port *port) + } - static void mtk8250_rx_dma(struct uart_8250_port *up) -@@ -372,7 +372,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, - * Ok, we're now changing the port state. Do it with - * interrupts disabled. + dont_test_tx_en: +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + + /* + * Clear the interrupt registers again for luck, and clear the +@@ -2493,17 +2493,17 @@ void serial8250_do_shutdown(struct uart_port *port) + * + * Synchronize UART_IER access against the console. */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); + up->ier = 0; + serial_port_out(port, UART_IER, 0); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); - /* - * Update the per-port timeout. -@@ -420,7 +420,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, - if (uart_console(port)) - up->port.cons->cflag = termios->c_cflag; + synchronize_irq(port->irq); + + if (up->dma) + serial8250_release_dma(up); + +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + if (port->flags & UPF_FOURPORT) { + /* reset interrupts on the AST Fourport board */ + inb((port->iobase & 0xfe0) | 0x1f); +@@ -2512,7 +2512,7 @@ void serial8250_do_shutdown(struct uart_port *port) + port->mctrl &= ~TIOCM_OUT2; + serial8250_set_mctrl(port, port->mctrl); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - /* Don't rewrite B0 */ - if (tty_termios_baud_rate(termios)) - tty_termios_encode_baud_rate(termios, baud, baud); -diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c -index 9ed62bc7c..e687cd0da 100644 ---- a/drivers/tty/serial/8250/8250_omap.c -+++ b/drivers/tty/serial/8250/8250_omap.c -@@ -405,7 +405,7 @@ static void omap_8250_set_termios(struct uart_port *port, - * interrupts disabled. - */ - pm_runtime_get_sync(port->dev); -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); /* - * Update the per-port timeout. -@@ -508,7 +508,7 @@ static void omap_8250_set_termios(struct uart_port *port, - } - omap8250_restore_regs(up); - -- spin_unlock_irq(&up->port.lock); -+ uart_port_unlock_irq(&up->port); - pm_runtime_mark_last_busy(port->dev); - pm_runtime_put_autosuspend(port->dev); + * Disable break condition and FIFOs +@@ -2756,14 +2756,14 @@ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) + quot = serial8250_get_divisor(port, baud, &frac); -@@ -533,7 +533,7 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, - pm_runtime_get_sync(port->dev); + serial8250_rpm_get(up); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); + uart_update_timeout(port, termios->c_cflag, baud); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - efr = serial_in(up, UART_EFR); -@@ -545,7 +545,7 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, - serial_out(up, UART_EFR, efr); - serial_out(up, UART_LCR, 0); + serial8250_set_divisor(port, baud, quot, frac); + serial_port_out(port, UART_LCR, up->lcr); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + serial8250_rpm_put(up); - pm_runtime_mark_last_busy(port->dev); - pm_runtime_put_autosuspend(port->dev); -@@ -676,7 +676,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) - unsigned long delay; + out_unlock: +@@ -2800,7 +2800,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, + * Synchronize UART_IER access against the console. + */ + serial8250_rpm_get(up); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - /* Synchronize UART_IER access against the console. */ -- spin_lock(&port->lock); -+ uart_port_lock(port); - up->ier = port->serial_in(port, UART_IER); - if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { - port->ops->stop_rx(port); -@@ -686,7 +686,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) - */ - cancel_delayed_work(&up->overrun_backoff); - } -- spin_unlock(&port->lock); -+ uart_port_unlock(port); + up->lcr = cval; /* Save computed LCR */ - delay = msecs_to_jiffies(up->overrun_backoff_time_ms); - schedule_delayed_work(&up->overrun_backoff, delay); -@@ -733,10 +733,10 @@ static int omap_8250_startup(struct uart_port *port) +@@ -2903,7 +2903,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, + serial_port_out(port, UART_FCR, up->fcr); /* set fcr */ } + serial8250_set_mctrl(port, port->mctrl); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + serial8250_rpm_put(up); - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - up->ier = UART_IER_RLSI | UART_IER_RDI; - serial_out(up, UART_IER, up->ier); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - - #ifdef CONFIG_PM - up->capabilities |= UART_CAP_RPM; -@@ -749,9 +749,9 @@ static int omap_8250_startup(struct uart_port *port) - serial_out(up, UART_OMAP_WER, priv->wer); - - if (up->dma && !(priv->habit & UART_HAS_EFR2)) { + /* Don't rewrite B0 */ +@@ -2926,15 +2926,15 @@ void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios) + { + if (termios->c_line == N_PPS) { + port->flags |= UPF_HARDPPS_CD; - spin_lock_irq(&port->lock); + uart_port_lock_irq(port); - up->dma->rx_dma(up); + serial8250_enable_ms(port); - spin_unlock_irq(&port->lock); + uart_port_unlock_irq(port); + } else { + port->flags &= ~UPF_HARDPPS_CD; + if (!UART_ENABLE_MS(port, termios->c_cflag)) { +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + serial8250_disable_ms(port); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + } } + } +@@ -3428,9 +3428,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + touch_nmi_watchdog(); - enable_irq(up->port.irq); -@@ -777,10 +777,10 @@ static void omap_8250_shutdown(struct uart_port *port) - serial_out(up, UART_OMAP_EFR2, 0x0); + if (oops_in_progress) +- locked = spin_trylock_irqsave(&port->lock, flags); ++ locked = uart_port_trylock_irqsave(port, &flags); + else +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - up->ier = 0; - serial_out(up, UART_IER, 0); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - disable_irq_nosync(up->port.irq); - dev_pm_clear_wake_irq(port->dev); + /* + * First save the IER then disable the interrupts +@@ -3500,7 +3500,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + serial8250_modem_status(up); -@@ -805,10 +805,10 @@ static void omap_8250_throttle(struct uart_port *port) + if (locked) +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } + + static unsigned int probe_baud(struct uart_port *port) +-- +2.51.0 + +From 2d5ad50294d76d2d5b05a5b85ef629fa837a05f1 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:24 +0206 +Subject: [PATCH 033/213] serial: 8250_dma: Use port lock wrappers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ilpo Järvinen +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-8-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_dma.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c +index 62492cf10bc9..fb85ef075ad6 100644 +--- a/drivers/tty/serial/8250/8250_dma.c ++++ b/drivers/tty/serial/8250/8250_dma.c +@@ -22,7 +22,7 @@ static void __dma_tx_complete(void *param) + dma_sync_single_for_cpu(dma->txchan->device->dev, dma->tx_addr, + UART_XMIT_SIZE, DMA_TO_DEVICE); + +- spin_lock_irqsave(&p->port.lock, flags); ++ uart_port_lock_irqsave(&p->port, &flags); + + dma->tx_running = 0; + +@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *param) + if (ret || !dma->tx_running) + serial8250_set_THRI(p); + +- spin_unlock_irqrestore(&p->port.lock, flags); ++ uart_port_unlock_irqrestore(&p->port, flags); + } + + static void __dma_rx_complete(struct uart_8250_port *p) +@@ -70,7 +70,7 @@ static void dma_rx_complete(void *param) + struct uart_8250_dma *dma = p->dma; + unsigned long flags; + +- spin_lock_irqsave(&p->port.lock, flags); ++ uart_port_lock_irqsave(&p->port, &flags); + if (dma->rx_running) + __dma_rx_complete(p); + +@@ -80,7 +80,7 @@ static void dma_rx_complete(void *param) + */ + if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR)) + p->dma->rx_dma(p); +- spin_unlock_irqrestore(&p->port.lock, flags); ++ uart_port_unlock_irqrestore(&p->port, flags); + } + + int serial8250_tx_dma(struct uart_8250_port *p) +-- +2.51.0 + +From c4f2be57dee4fb8eefb57e225ef781e7f33b61dd Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:25 +0206 +Subject: [PATCH 034/213] serial: 8250_dw: Use port lock wrappers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ilpo Järvinen +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-9-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_dw.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c +index eaf4a907380a..d12f230f1605 100644 +--- a/drivers/tty/serial/8250/8250_dw.c ++++ b/drivers/tty/serial/8250/8250_dw.c +@@ -290,20 +290,20 @@ static int dw8250_handle_irq(struct uart_port *p) + * so we limit the workaround only to non-DMA mode. + */ + if (!up->dma && rx_timeout) { +- spin_lock_irqsave(&p->lock, flags); ++ uart_port_lock_irqsave(p, &flags); + status = serial_lsr_in(up); + + if (!(status & (UART_LSR_DR | UART_LSR_BI))) + (void) p->serial_in(p, UART_RX); + +- spin_unlock_irqrestore(&p->lock, flags); ++ uart_port_unlock_irqrestore(p, flags); + } + + /* Manually stop the Rx DMA transfer when acting as flow controller */ + if (quirks & DW_UART_QUIRK_IS_DMA_FC && up->dma && up->dma->rx_running && rx_timeout) { +- spin_lock_irqsave(&p->lock, flags); ++ uart_port_lock_irqsave(p, &flags); + status = serial_lsr_in(up); +- spin_unlock_irqrestore(&p->lock, flags); ++ uart_port_unlock_irqrestore(p, flags); + + if (status & (UART_LSR_DR | UART_LSR_BI)) { + dw8250_writel_ext(p, RZN1_UART_RDMACR, 0); +-- +2.51.0 + +From 4c04158b7437c6b8ade8771f975e64dc709fc1fd Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:26 +0206 +Subject: [PATCH 035/213] serial: 8250_exar: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-10-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_exar.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c +index 27430fdd9e76..17be6ad24a0f 100644 +--- a/drivers/tty/serial/8250/8250_exar.c ++++ b/drivers/tty/serial/8250/8250_exar.c +@@ -243,9 +243,9 @@ static int xr17v35x_startup(struct uart_port *port) + * + * Synchronize UART_IER access against the console. + */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + serial_port_out(port, UART_IER, 0); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + + return serial8250_do_startup(port); + } +-- +2.51.0 + +From 15185536d956e39b2daa2ea74e08da1911616c91 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:27 +0206 +Subject: [PATCH 036/213] serial: 8250_fsl: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-11-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_fsl.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c +index 6af4e1c1210a..f522eb5026c9 100644 +--- a/drivers/tty/serial/8250/8250_fsl.c ++++ b/drivers/tty/serial/8250/8250_fsl.c +@@ -30,11 +30,11 @@ int fsl8250_handle_irq(struct uart_port *port) + unsigned int iir; + struct uart_8250_port *up = up_to_u8250p(port); + +- spin_lock_irqsave(&up->port.lock, flags); ++ uart_port_lock_irqsave(&up->port, &flags); + + iir = port->serial_in(port, UART_IIR); + if (iir & UART_IIR_NO_INT) { +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); + return 0; + } + +@@ -54,7 +54,7 @@ int fsl8250_handle_irq(struct uart_port *port) + if (unlikely(up->lsr_saved_flags & UART_LSR_BI)) { + up->lsr_saved_flags &= ~UART_LSR_BI; + port->serial_in(port, UART_RX); +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); + return 1; + } + +-- +2.51.0 + +From 670d893b63507930d88d5680f1bbba6dc1b65d3b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:28 +0206 +Subject: [PATCH 037/213] serial: 8250_mtk: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Chen-Yu Tsai +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-12-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_mtk.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c +index 28f9a2679a20..33699e86eb52 100644 +--- a/drivers/tty/serial/8250/8250_mtk.c ++++ b/drivers/tty/serial/8250/8250_mtk.c +@@ -102,7 +102,7 @@ static void mtk8250_dma_rx_complete(void *param) + if (data->rx_status == DMA_RX_SHUTDOWN) + return; + +- spin_lock_irqsave(&up->port.lock, flags); ++ uart_port_lock_irqsave(&up->port, &flags); + + dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); + total = dma->rx_size - state.residue; +@@ -128,7 +128,7 @@ static void mtk8250_dma_rx_complete(void *param) + + mtk8250_rx_dma(up); + +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); + } + + static void mtk8250_rx_dma(struct uart_8250_port *up) +@@ -372,7 +372,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, + * Ok, we're now changing the port state. Do it with + * interrupts disabled. + */ +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + + /* + * Update the per-port timeout. +@@ -420,7 +420,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, + if (uart_console(port)) + up->port.cons->cflag = termios->c_cflag; + +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + /* Don't rewrite B0 */ + if (tty_termios_baud_rate(termios)) + tty_termios_encode_baud_rate(termios, baud, baud); +-- +2.51.0 + +From 4572eed7e7b57a36b14b1af7f7234464060b83ad Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:29 +0206 +Subject: [PATCH 038/213] serial: 8250_omap: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-13-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_omap.c | 52 ++++++++++++++--------------- + 1 file changed, 26 insertions(+), 26 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c +index 9ed62bc7cdd8..e687cd0daa76 100644 +--- a/drivers/tty/serial/8250/8250_omap.c ++++ b/drivers/tty/serial/8250/8250_omap.c +@@ -405,7 +405,7 @@ static void omap_8250_set_termios(struct uart_port *port, + * interrupts disabled. + */ + pm_runtime_get_sync(port->dev); +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + + /* + * Update the per-port timeout. +@@ -508,7 +508,7 @@ static void omap_8250_set_termios(struct uart_port *port, + } + omap8250_restore_regs(up); + +- spin_unlock_irq(&up->port.lock); ++ uart_port_unlock_irq(&up->port); + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); + +@@ -533,7 +533,7 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, + pm_runtime_get_sync(port->dev); + + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + efr = serial_in(up, UART_EFR); +@@ -545,7 +545,7 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, + serial_out(up, UART_EFR, efr); + serial_out(up, UART_LCR, 0); + +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); +@@ -676,7 +676,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) + unsigned long delay; + + /* Synchronize UART_IER access against the console. */ +- spin_lock(&port->lock); ++ uart_port_lock(port); + up->ier = port->serial_in(port, UART_IER); + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); +@@ -686,7 +686,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) + */ + cancel_delayed_work(&up->overrun_backoff); + } +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + + delay = msecs_to_jiffies(up->overrun_backoff_time_ms); + schedule_delayed_work(&up->overrun_backoff, delay); +@@ -733,10 +733,10 @@ static int omap_8250_startup(struct uart_port *port) + } + + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + up->ier = UART_IER_RLSI | UART_IER_RDI; + serial_out(up, UART_IER, up->ier); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + + #ifdef CONFIG_PM + up->capabilities |= UART_CAP_RPM; +@@ -749,9 +749,9 @@ static int omap_8250_startup(struct uart_port *port) + serial_out(up, UART_OMAP_WER, priv->wer); + + if (up->dma && !(priv->habit & UART_HAS_EFR2)) { +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + up->dma->rx_dma(up); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + } + + enable_irq(up->port.irq); +@@ -777,10 +777,10 @@ static void omap_8250_shutdown(struct uart_port *port) + serial_out(up, UART_OMAP_EFR2, 0x0); + + /* Synchronize UART_IER access against the console. */ +- spin_lock_irq(&port->lock); ++ uart_port_lock_irq(port); + up->ier = 0; + serial_out(up, UART_IER, 0); +- spin_unlock_irq(&port->lock); ++ uart_port_unlock_irq(port); + disable_irq_nosync(up->port.irq); + dev_pm_clear_wake_irq(port->dev); + +@@ -805,10 +805,10 @@ static void omap_8250_throttle(struct uart_port *port) pm_runtime_get_sync(port->dev); @@ -1978,8 +3915,49 @@ index 9ed62bc7c..e687cd0da 100644 } priv->latency = priv->calc_latency; +-- +2.51.0 + +From 20b3c658acda92bf0c0410f7f0135348235d42c6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:30 +0206 +Subject: [PATCH 039/213] serial: 8250_pci1xxxx: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-14-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_pci1xxxx.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c -index a3b25779d..53e238c8c 100644 +index a3b25779d921..53e238c8cc89 100644 --- a/drivers/tty/serial/8250/8250_pci1xxxx.c +++ b/drivers/tty/serial/8250/8250_pci1xxxx.c @@ -225,10 +225,10 @@ static bool pci1xxxx_port_suspend(int line) @@ -2008,388 +3986,369 @@ index a3b25779d..53e238c8c 100644 } mutex_unlock(&tport->mutex); } -diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c -index c2778300e..d40df0475 100644 ---- a/drivers/tty/serial/8250/8250_port.c -+++ b/drivers/tty/serial/8250/8250_port.c -@@ -557,6 +557,11 @@ static int serial8250_em485_init(struct uart_8250_port *p) - if (!p->em485) - return -ENOMEM; - -+#ifndef CONFIG_SERIAL_8250_LEGACY_CONSOLE -+ if (uart_console(&p->port)) -+ dev_warn(p->port.dev, "no atomic printing for rs485 consoles\n"); -+#endif -+ - hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC, -@@ -689,7 +694,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) - - if (p->capabilities & UART_CAP_SLEEP) { - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&p->port.lock); -+ uart_port_lock_irq(&p->port); - if (p->capabilities & UART_CAP_EFR) { - lcr = serial_in(p, UART_LCR); - efr = serial_in(p, UART_EFR); -@@ -703,13 +708,17 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) - serial_out(p, UART_EFR, efr); - serial_out(p, UART_LCR, lcr); - } -- spin_unlock_irq(&p->port.lock); -+ uart_port_unlock_irq(&p->port); - } - - serial8250_rpm_put(p); - } - --static void serial8250_clear_IER(struct uart_8250_port *up) -+/* -+ * Only to be used by write_atomic() and the legacy write(), which do not -+ * require port lock. -+ */ -+static void __serial8250_clear_IER(struct uart_8250_port *up) - { - if (up->capabilities & UART_CAP_UUE) - serial_out(up, UART_IER, UART_IER_UUE); -@@ -717,6 +726,14 @@ static void serial8250_clear_IER(struct uart_8250_port *up) - serial_out(up, UART_IER, 0); - } +-- +2.51.0 + +From a00cd1747c8e85f091199eb79cf1c3b75dccf370 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:31 +0206 +Subject: [PATCH 040/213] serial: altera_jtaguart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-15-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/altera_jtaguart.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c +index 5fab4c978891..7090b251dd4d 100644 +--- a/drivers/tty/serial/altera_jtaguart.c ++++ b/drivers/tty/serial/altera_jtaguart.c +@@ -147,14 +147,14 @@ static irqreturn_t altera_jtaguart_interrupt(int irq, void *data) + isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >> + ALTERA_JTAGUART_CONTROL_RI_OFF) & port->read_status_mask; -+static inline void serial8250_clear_IER(struct uart_8250_port *up) -+{ -+ /* Port locked to synchronize UART_IER access against the console. */ -+ lockdep_assert_held_once(&up->port.lock); -+ -+ __serial8250_clear_IER(up); -+} -+ - #ifdef CONFIG_SERIAL_8250_RSA - /* - * Attempts to turn on the RSA FIFO. Returns zero on failure. -@@ -746,9 +763,9 @@ static void enable_rsa(struct uart_8250_port *up) - { - if (up->port.type == PORT_RSA) { - if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) { -- spin_lock_irq(&up->port.lock); -+ uart_port_lock_irq(&up->port); - __enable_rsa(up); -- spin_unlock_irq(&up->port.lock); -+ uart_port_unlock_irq(&up->port); - } - if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) - serial_out(up, UART_RSA_FRR, 0); -@@ -768,7 +785,7 @@ static void disable_rsa(struct uart_8250_port *up) +- spin_lock(&port->lock); ++ uart_port_lock(port); - if (up->port.type == PORT_RSA && - up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) { -- spin_lock_irq(&up->port.lock); -+ uart_port_lock_irq(&up->port); + if (isr & ALTERA_JTAGUART_CONTROL_RE_MSK) + altera_jtaguart_rx_chars(port); + if (isr & ALTERA_JTAGUART_CONTROL_WE_MSK) + altera_jtaguart_tx_chars(port); - mode = serial_in(up, UART_RSA_MSR); - result = !(mode & UART_RSA_MSR_FIFO); -@@ -781,7 +798,7 @@ static void disable_rsa(struct uart_8250_port *up) +- spin_unlock(&port->lock); ++ uart_port_unlock(port); - if (result) - up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; -- spin_unlock_irq(&up->port.lock); -+ uart_port_unlock_irq(&up->port); - } + return IRQ_RETVAL(isr); } - #endif /* CONFIG_SERIAL_8250_RSA */ -@@ -1172,7 +1189,7 @@ static void autoconfig(struct uart_8250_port *up) - * - * Synchronize UART_IER access against the console. - */ +@@ -180,14 +180,14 @@ static int altera_jtaguart_startup(struct uart_port *port) + return ret; + } + - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - up->capabilities = 0; - up->bugs = 0; -@@ -1211,7 +1228,7 @@ static void autoconfig(struct uart_8250_port *up) - /* - * We failed; there's nothing here - */ -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - DEBUG_AUTOCONF("IER test failed (%02x, %02x) ", - scratch2, scratch3); - goto out; -@@ -1235,7 +1252,7 @@ static void autoconfig(struct uart_8250_port *up) - status1 = serial_in(up, UART_MSR) & UART_MSR_STATUS_BITS; - serial8250_out_MCR(up, save_mcr); - if (status1 != (UART_MSR_DCD | UART_MSR_CTS)) { -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - DEBUG_AUTOCONF("LOOP test failed (%02x) ", - status1); - goto out; -@@ -1304,7 +1321,7 @@ static void autoconfig(struct uart_8250_port *up) - serial8250_clear_IER(up); + /* Enable RX interrupts now */ + port->read_status_mask = ALTERA_JTAGUART_CONTROL_RE_MSK; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); - out_unlock: - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - /* - * Check if the device is a Fintek F81216A -@@ -1341,9 +1358,9 @@ static void autoconfig_irq(struct uart_8250_port *up) - probe_irq_off(probe_irq_on()); - save_mcr = serial8250_in_MCR(up); - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - save_ier = serial_in(up, UART_IER); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - serial8250_out_MCR(up, UART_MCR_OUT1 | UART_MCR_OUT2); - - irqs = probe_irq_on(); -@@ -1356,9 +1373,9 @@ static void autoconfig_irq(struct uart_8250_port *up) - UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2); - } - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - serial_out(up, UART_IER, UART_IER_ALL_INTR); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - serial_in(up, UART_LSR); - serial_in(up, UART_RX); - serial_in(up, UART_IIR); -@@ -1369,9 +1386,9 @@ static void autoconfig_irq(struct uart_8250_port *up) - - serial8250_out_MCR(up, save_mcr); - /* Synchronize UART_IER access against the console. */ -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - serial_out(up, UART_IER, save_ier); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - - if (port->flags & UPF_FOURPORT) - outb_p(save_ICP, ICP); -@@ -1436,13 +1453,13 @@ static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t) + return 0; + } +@@ -196,14 +196,14 @@ static void altera_jtaguart_shutdown(struct uart_port *port) + { unsigned long flags; - serial8250_rpm_get(p); -- spin_lock_irqsave(&p->port.lock, flags); -+ uart_port_lock_irqsave(&p->port, &flags); - if (em485->active_timer == &em485->stop_tx_timer) { - p->rs485_stop_tx(p); - em485->active_timer = NULL; - em485->tx_stopped = true; - } -- spin_unlock_irqrestore(&p->port.lock, flags); -+ uart_port_unlock_irqrestore(&p->port, flags); - serial8250_rpm_put(p); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - return HRTIMER_NORESTART; -@@ -1624,12 +1641,12 @@ static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t) - struct uart_8250_port *p = em485->port; - unsigned long flags; + /* Disable all interrupts now */ + port->read_status_mask = 0; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); -- spin_lock_irqsave(&p->port.lock, flags); -+ uart_port_lock_irqsave(&p->port, &flags); - if (em485->active_timer == &em485->start_tx_timer) { - __start_tx(&p->port); - em485->active_timer = NULL; - } -- spin_unlock_irqrestore(&p->port.lock, flags); -+ uart_port_unlock_irqrestore(&p->port, flags); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); - return HRTIMER_NORESTART; + free_irq(port->irq, port); } -@@ -1912,7 +1929,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) - if (iir & UART_IIR_NO_INT) - return 0; +@@ -264,33 +264,33 @@ static void altera_jtaguart_console_putc(struct uart_port *port, unsigned char c + unsigned long flags; + u32 status; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); + while (!altera_jtaguart_tx_space(port, &status)) { +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); - status = serial_lsr_in(up); - -@@ -1982,9 +1999,9 @@ static int serial8250_tx_threshold_handle_irq(struct uart_port *port) - if ((iir & UART_IIR_ID) == UART_IIR_THRI) { - struct uart_8250_port *up = up_to_u8250p(port); + if ((status & ALTERA_JTAGUART_CONTROL_AC_MSK) == 0) { + return; /* no connection activity */ + } + cpu_relax(); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - serial8250_tx_chars(up); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); } - - iir = serial_port_in(port, UART_IIR); -@@ -1999,10 +2016,10 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) - - serial8250_rpm_get(up); + writel(c, port->membase + ALTERA_JTAGUART_DATA_REG); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } + #else + static void altera_jtaguart_console_putc(struct uart_port *port, unsigned char c) + { + unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - if (!serial8250_tx_dma_running(up) && uart_lsr_tx_empty(serial_lsr_in(up))) - result = TIOCSER_TEMT; + while (!altera_jtaguart_tx_space(port, NULL)) { +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + cpu_relax(); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + } + writel(c, port->membase + ALTERA_JTAGUART_DATA_REG); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); + } + #endif - serial8250_rpm_put(up); - -@@ -2064,13 +2081,13 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state) +-- +2.51.0 + +From 91bc4661edebc763d224ac5ece2f7f179580f42b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:32 +0206 +Subject: [PATCH 041/213] serial: altera_uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-16-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/altera_uart.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c +index a9c41942190c..77835ac68df2 100644 +--- a/drivers/tty/serial/altera_uart.c ++++ b/drivers/tty/serial/altera_uart.c +@@ -164,13 +164,13 @@ static void altera_uart_break_ctl(struct uart_port *port, int break_state) + struct altera_uart *pp = container_of(port, struct altera_uart, port); unsigned long flags; - serial8250_rpm_get(up); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if (break_state == -1) - up->lcr |= UART_LCR_SBC; + pp->imr |= ALTERA_UART_CONTROL_TRBK_MSK; else - up->lcr &= ~UART_LCR_SBC; - serial_port_out(port, UART_LCR, up->lcr); + pp->imr &= ~ALTERA_UART_CONTROL_TRBK_MSK; + altera_uart_update_ctrl_reg(pp); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - serial8250_rpm_put(up); } -@@ -2206,7 +2223,7 @@ int serial8250_do_startup(struct uart_port *port) - * - * Synchronize UART_IER access against the console. - */ -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - up->acr = 0; - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - serial_port_out(port, UART_EFR, UART_EFR_ECB); -@@ -2216,7 +2233,7 @@ int serial8250_do_startup(struct uart_port *port) - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - serial_port_out(port, UART_EFR, UART_EFR_ECB); - serial_port_out(port, UART_LCR, 0); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - if (port->type == PORT_DA830) { -@@ -2225,10 +2242,10 @@ int serial8250_do_startup(struct uart_port *port) - * - * Synchronize UART_IER access against the console. - */ -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - serial_port_out(port, UART_IER, 0); - serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - mdelay(10); - - /* Enable Tx, Rx and free run mode */ -@@ -2342,7 +2359,7 @@ int serial8250_do_startup(struct uart_port *port) - * - * Synchronize UART_IER access against the console. - */ -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - wait_for_xmitr(up, UART_LSR_THRE); - serial_port_out_sync(port, UART_IER, UART_IER_THRI); -@@ -2354,7 +2371,7 @@ int serial8250_do_startup(struct uart_port *port) - iir = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); - -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - - if (port->irqflags & IRQF_SHARED) - enable_irq(port->irq); -@@ -2377,7 +2394,7 @@ int serial8250_do_startup(struct uart_port *port) - */ - serial_port_out(port, UART_LCR, UART_LCR_WLEN8); + static void altera_uart_set_termios(struct uart_port *port, +@@ -187,10 +187,10 @@ static void altera_uart_set_termios(struct uart_port *port, + tty_termios_copy_hw(termios, old); + tty_termios_encode_baud_rate(termios, baud, baud); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - if (up->port.flags & UPF_FOURPORT) { - if (!up->port.irq) - up->port.mctrl |= TIOCM_OUT1; -@@ -2423,7 +2440,7 @@ int serial8250_do_startup(struct uart_port *port) - } - - dont_test_tx_en: + uart_update_timeout(port, termios->c_cflag, baud); + altera_uart_writel(port, baudclk, ALTERA_UART_DIVISOR_REG); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); /* - * Clear the interrupt registers again for luck, and clear the -@@ -2494,17 +2511,17 @@ void serial8250_do_shutdown(struct uart_port *port) - * - * Synchronize UART_IER access against the console. - */ + * FIXME: port->read_status_mask and port->ignore_status_mask +@@ -264,12 +264,12 @@ static irqreturn_t altera_uart_interrupt(int irq, void *data) + + isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr; + - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - up->ier = 0; - serial_port_out(port, UART_IER, 0); + if (isr & ALTERA_UART_STATUS_RRDY_MSK) + altera_uart_rx_chars(port); + if (isr & ALTERA_UART_STATUS_TRDY_MSK) + altera_uart_tx_chars(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - synchronize_irq(port->irq); - - if (up->dma) - serial8250_release_dma(up); + return IRQ_RETVAL(isr); + } +@@ -313,13 +313,13 @@ static int altera_uart_startup(struct uart_port *port) + } + } - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - if (port->flags & UPF_FOURPORT) { - /* reset interrupts on the AST Fourport board */ - inb((port->iobase & 0xfe0) | 0x1f); -@@ -2513,7 +2530,7 @@ void serial8250_do_shutdown(struct uart_port *port) - port->mctrl &= ~TIOCM_OUT2; - serial8250_set_mctrl(port, port->mctrl); + /* Enable RX interrupts now */ + pp->imr = ALTERA_UART_CONTROL_RRDY_MSK; + altera_uart_update_ctrl_reg(pp); + - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - /* - * Disable break condition and FIFOs -@@ -2757,14 +2774,14 @@ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) - quot = serial8250_get_divisor(port, baud, &frac); + return 0; + } +@@ -329,13 +329,13 @@ static void altera_uart_shutdown(struct uart_port *port) + struct altera_uart *pp = container_of(port, struct altera_uart, port); + unsigned long flags; - serial8250_rpm_get(up); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - uart_update_timeout(port, termios->c_cflag, baud); + /* Disable all interrupts now */ + pp->imr = 0; + altera_uart_update_ctrl_reg(pp); - serial8250_set_divisor(port, baud, quot, frac); - serial_port_out(port, UART_LCR, up->lcr); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + + if (port->irq) + free_irq(port->irq, port); +-- +2.51.0 + +From c34a67ceaebb75f7c1ec395d078207e8257cd9c7 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:33 +0206 +Subject: [PATCH 042/213] serial: amba-pl010: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-17-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/amba-pl010.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c +index b5a7404cbacb..eabbf8afc9b5 100644 +--- a/drivers/tty/serial/amba-pl010.c ++++ b/drivers/tty/serial/amba-pl010.c +@@ -207,7 +207,7 @@ static irqreturn_t pl010_int(int irq, void *dev_id) + unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT; + int handled = 0; + +- spin_lock(&port->lock); ++ uart_port_lock(port); + + status = readb(port->membase + UART010_IIR); + if (status) { +@@ -228,7 +228,7 @@ static irqreturn_t pl010_int(int irq, void *dev_id) + handled = 1; + } + +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + + return IRQ_RETVAL(handled); + } +@@ -270,14 +270,14 @@ static void pl010_break_ctl(struct uart_port *port, int break_state) + unsigned long flags; + unsigned int lcr_h; +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + lcr_h = readb(port->membase + UART010_LCRH); + if (break_state == -1) + lcr_h |= UART01x_LCRH_BRK; + else + lcr_h &= ~UART01x_LCRH_BRK; + writel(lcr_h, port->membase + UART010_LCRH); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - serial8250_rpm_put(up); + } + + static int pl010_startup(struct uart_port *port) +@@ -385,7 +385,7 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, + if (port->fifosize > 1) + lcr_h |= UART01x_LCRH_FEN; - out_unlock: -@@ -2801,7 +2818,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, - * Synchronize UART_IER access against the console. - */ - serial8250_rpm_get(up); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - up->lcr = cval; /* Save computed LCR */ + /* + * Update the per-port timeout. +@@ -438,22 +438,22 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, + writel(lcr_h, port->membase + UART010_LCRH); + writel(old_cr, port->membase + UART010_CR); -@@ -2904,7 +2921,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, - serial_port_out(port, UART_FCR, up->fcr); /* set fcr */ - } - serial8250_set_mctrl(port, port->mctrl); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - serial8250_rpm_put(up); + } - /* Don't rewrite B0 */ -@@ -2927,15 +2944,15 @@ void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios) + static void pl010_set_ldisc(struct uart_port *port, struct ktermios *termios) { if (termios->c_line == N_PPS) { port->flags |= UPF_HARDPPS_CD; - spin_lock_irq(&port->lock); + uart_port_lock_irq(port); - serial8250_enable_ms(port); + pl010_enable_ms(port); - spin_unlock_irq(&port->lock); + uart_port_unlock_irq(port); } else { @@ -2397,447 +4356,455 @@ index c2778300e..d40df0475 100644 if (!UART_ENABLE_MS(port, termios->c_cflag)) { - spin_lock_irq(&port->lock); + uart_port_lock_irq(port); - serial8250_disable_ms(port); + pl010_disable_ms(port); - spin_unlock_irq(&port->lock); + uart_port_unlock_irq(port); } } } -@@ -3332,6 +3349,11 @@ static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) +-- +2.51.0 + +From f4f03403afb0620eaa4e99e0e8fe5218cebc56bf Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:35 +0206 +Subject: [PATCH 043/213] serial: apb: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-19-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/apbuart.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c +index d3cb341f2c55..364599f256db 100644 +--- a/drivers/tty/serial/apbuart.c ++++ b/drivers/tty/serial/apbuart.c +@@ -133,7 +133,7 @@ static irqreturn_t apbuart_int(int irq, void *dev_id) + struct uart_port *port = dev_id; + unsigned int status; - wait_for_xmitr(up, UART_LSR_THRE); - serial_port_out(port, UART_TX, ch); -+ -+ if (ch == '\n') -+ up->console_newline_needed = false; -+ else -+ up->console_newline_needed = true; +- spin_lock(&port->lock); ++ uart_port_lock(port); + + status = UART_GET_STATUS(port); + if (status & UART_STATUS_DR) +@@ -141,7 +141,7 @@ static irqreturn_t apbuart_int(int irq, void *dev_id) + if (status & UART_STATUS_THE) + apbuart_tx_chars(port); + +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + + return IRQ_HANDLED; } +@@ -228,7 +228,7 @@ static void apbuart_set_termios(struct uart_port *port, + if (termios->c_cflag & CRTSCTS) + cr |= UART_CTRL_FL; - /* -@@ -3360,6 +3382,7 @@ static void serial8250_console_restore(struct uart_8250_port *up) - serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + + /* Update the per-port timeout. */ + uart_update_timeout(port, termios->c_cflag, baud); +@@ -251,7 +251,7 @@ static void apbuart_set_termios(struct uart_port *port, + UART_PUT_SCAL(port, quot); + UART_PUT_CTRL(port, cr); + +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); } -+#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE - static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) - { - unsigned int i; -@@ -3429,15 +3452,15 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, - touch_nmi_watchdog(); + static const char *apbuart_type(struct uart_port *port) +-- +2.51.0 + +From 0c29c7e21f2b3e4a17ba3893673b770e13c93b33 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:36 +0206 +Subject: [PATCH 044/213] serial: ar933x: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-20-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/ar933x_uart.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c +index 924c1a89347c..ffd234673177 100644 +--- a/drivers/tty/serial/ar933x_uart.c ++++ b/drivers/tty/serial/ar933x_uart.c +@@ -133,9 +133,9 @@ static unsigned int ar933x_uart_tx_empty(struct uart_port *port) + unsigned long flags; + unsigned int rdata; - if (oops_in_progress) -- locked = spin_trylock_irqsave(&port->lock, flags); -+ locked = uart_port_trylock_irqsave(port, &flags); +- spin_lock_irqsave(&up->port.lock, flags); ++ uart_port_lock_irqsave(&up->port, &flags); + rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG); +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); + + return (rdata & AR933X_UART_DATA_TX_CSR) ? 0 : TIOCSER_TEMT; + } +@@ -220,14 +220,14 @@ static void ar933x_uart_break_ctl(struct uart_port *port, int break_state) + container_of(port, struct ar933x_uart_port, port); + unsigned long flags; + +- spin_lock_irqsave(&up->port.lock, flags); ++ uart_port_lock_irqsave(&up->port, &flags); + if (break_state == -1) + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_BREAK); else -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); + ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_BREAK); +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); + } - /* - * First save the IER then disable the interrupts + /* +@@ -318,7 +318,7 @@ static void ar933x_uart_set_termios(struct uart_port *port, + * Ok, we're now changing the port state. Do it with + * interrupts disabled. */ - ier = serial_port_in(port, UART_IER); -- serial8250_clear_IER(up); -+ __serial8250_clear_IER(up); +- spin_lock_irqsave(&up->port.lock, flags); ++ uart_port_lock_irqsave(&up->port, &flags); - /* check scratch reg to see if port powered off during system sleep */ - if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3501,8 +3524,137 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, - serial8250_modem_status(up); + /* disable the UART */ + ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG, +@@ -352,7 +352,7 @@ static void ar933x_uart_set_termios(struct uart_port *port, + AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, + AR933X_UART_CS_IF_MODE_DCE << AR933X_UART_CS_IF_MODE_S); - if (locked) -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } -+#else -+bool serial8250_console_write_thread(struct uart_8250_port *up, -+ struct nbcon_write_context *wctxt) -+{ -+ struct uart_8250_em485 *em485 = up->em485; -+ struct uart_port *port = &up->port; -+ bool done = false; -+ unsigned int ier; -+ -+ touch_nmi_watchdog(); -+ -+ if (!nbcon_enter_unsafe(wctxt)) -+ return false; -+ -+ /* First save IER then disable the interrupts. */ -+ ier = serial_port_in(port, UART_IER); -+ serial8250_clear_IER(up); -+ -+ /* Check scratch reg if port powered off during system sleep. */ -+ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -+ serial8250_console_restore(up); -+ up->canary = 0; -+ } -+ -+ if (em485) { -+ if (em485->tx_stopped) -+ up->rs485_start_tx(up); -+ mdelay(port->rs485.delay_rts_before_send); -+ } -+ -+ if (nbcon_exit_unsafe(wctxt)) { -+ int len = READ_ONCE(wctxt->len); -+ int i; -+ -+ /* -+ * Write out the message. Toggle unsafe for each byte in order -+ * to give another (higher priority) context the opportunity -+ * for a friendly takeover. If such a takeover occurs, this -+ * context must reacquire ownership in order to perform final -+ * actions (such as re-enabling the interrupts). -+ * -+ * IMPORTANT: wctxt->outbuf and wctxt->len are no longer valid -+ * after a reacquire so writing the message must be -+ * aborted. -+ */ -+ for (i = 0; i < len; i++) { -+ if (!nbcon_enter_unsafe(wctxt)) { -+ nbcon_reacquire(wctxt); -+ break; -+ } -+ -+ uart_console_write(port, wctxt->outbuf + i, 1, serial8250_console_putchar); -+ -+ if (!nbcon_exit_unsafe(wctxt)) { -+ nbcon_reacquire(wctxt); -+ break; -+ } -+ } -+ done = (i == len); -+ } else { -+ nbcon_reacquire(wctxt); -+ } -+ -+ while (!nbcon_enter_unsafe(wctxt)) -+ nbcon_reacquire(wctxt); -+ -+ /* Finally, wait for transmitter to become empty and restore IER. */ -+ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); -+ if (em485) { -+ mdelay(port->rs485.delay_rts_after_send); -+ if (em485->tx_stopped) -+ up->rs485_stop_tx(up); -+ } -+ serial_port_out(port, UART_IER, ier); -+ -+ /* -+ * The receive handling will happen properly because the receive ready -+ * bit will still be set; it is not cleared on read. However, modem -+ * control will not, we must call it if we have saved something in the -+ * saved flags while processing with interrupts off. -+ */ -+ if (up->msr_saved_flags) -+ serial8250_modem_status(up); -+ -+ /* Success if no handover/takeover and message fully printed. */ -+ return (nbcon_exit_unsafe(wctxt) && done); -+} -+ -+bool serial8250_console_write_atomic(struct uart_8250_port *up, -+ struct nbcon_write_context *wctxt) -+{ -+ struct uart_port *port = &up->port; -+ unsigned int ier; -+ -+ /* Atomic console not supported for rs485 mode. */ -+ if (up->em485) -+ return false; -+ -+ touch_nmi_watchdog(); -+ -+ if (!nbcon_enter_unsafe(wctxt)) -+ return false; -+ -+ /* -+ * First save IER then disable the interrupts. The special variant to -+ * clear IER is used because atomic printing may occur without holding -+ * the port lock. -+ */ -+ ier = serial_port_in(port, UART_IER); -+ __serial8250_clear_IER(up); -+ -+ /* Check scratch reg if port powered off during system sleep. */ -+ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -+ serial8250_console_restore(up); -+ up->canary = 0; -+ } -+ -+ if (up->console_newline_needed) -+ uart_console_write(port, "\n", 1, serial8250_console_putchar); -+ uart_console_write(port, wctxt->outbuf, wctxt->len, serial8250_console_putchar); -+ -+ /* Finally, wait for transmitter to become empty and restore IER. */ -+ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); -+ serial_port_out(port, UART_IER, ier); -+ -+ /* Success if no handover/takeover. */ -+ return nbcon_exit_unsafe(wctxt); -+} -+#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */ - - static unsigned int probe_baud(struct uart_port *port) - { -@@ -3521,6 +3673,7 @@ static unsigned int probe_baud(struct uart_port *port) - - int serial8250_console_setup(struct uart_port *port, char *options, bool probe) - { -+ struct uart_8250_port *up = up_to_u8250p(port); - int baud = 9600; - int bits = 8; - int parity = 'n'; -@@ -3530,6 +3683,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) - if (!port->iobase && !port->membase) - return -ENODEV; +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); -+ up->console_newline_needed = false; -+ - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); - else if (probe) -diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c -index 5fab4c978..7090b251d 100644 ---- a/drivers/tty/serial/altera_jtaguart.c -+++ b/drivers/tty/serial/altera_jtaguart.c -@@ -147,14 +147,14 @@ static irqreturn_t altera_jtaguart_interrupt(int irq, void *data) - isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >> - ALTERA_JTAGUART_CONTROL_RI_OFF) & port->read_status_mask; + if (tty_termios_baud_rate(new)) + tty_termios_encode_baud_rate(new, baud, baud); +@@ -450,7 +450,7 @@ static irqreturn_t ar933x_uart_interrupt(int irq, void *dev_id) + if ((status & AR933X_UART_CS_HOST_INT) == 0) + return IRQ_NONE; -- spin_lock(&port->lock); -+ uart_port_lock(port); +- spin_lock(&up->port.lock); ++ uart_port_lock(&up->port); - if (isr & ALTERA_JTAGUART_CONTROL_RE_MSK) - altera_jtaguart_rx_chars(port); - if (isr & ALTERA_JTAGUART_CONTROL_WE_MSK) - altera_jtaguart_tx_chars(port); + status = ar933x_uart_read(up, AR933X_UART_INT_REG); + status &= ar933x_uart_read(up, AR933X_UART_INT_EN_REG); +@@ -468,7 +468,7 @@ static irqreturn_t ar933x_uart_interrupt(int irq, void *dev_id) + ar933x_uart_tx_chars(up); + } -- spin_unlock(&port->lock); -+ uart_port_unlock(port); +- spin_unlock(&up->port.lock); ++ uart_port_unlock(&up->port); - return IRQ_RETVAL(isr); + return IRQ_HANDLED; } -@@ -180,14 +180,14 @@ static int altera_jtaguart_startup(struct uart_port *port) +@@ -485,7 +485,7 @@ static int ar933x_uart_startup(struct uart_port *port) + if (ret) return ret; - } -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); +- spin_lock_irqsave(&up->port.lock, flags); ++ uart_port_lock_irqsave(&up->port, &flags); - /* Enable RX interrupts now */ - port->read_status_mask = ALTERA_JTAGUART_CONTROL_RE_MSK; - writel(port->read_status_mask, - port->membase + ALTERA_JTAGUART_CONTROL_REG); + /* Enable HOST interrupts */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, +@@ -498,7 +498,7 @@ static int ar933x_uart_startup(struct uart_port *port) + /* Enable RX interrupts */ + ar933x_uart_start_rx_interrupt(up); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); +- spin_unlock_irqrestore(&up->port.lock, flags); ++ uart_port_unlock_irqrestore(&up->port, flags); return 0; } -@@ -196,14 +196,14 @@ static void altera_jtaguart_shutdown(struct uart_port *port) - { - unsigned long flags; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); +@@ -632,9 +632,9 @@ static void ar933x_uart_console_write(struct console *co, const char *s, + if (up->port.sysrq) + locked = 0; + else if (oops_in_progress) +- locked = spin_trylock(&up->port.lock); ++ locked = uart_port_trylock(&up->port); + else +- spin_lock(&up->port.lock); ++ uart_port_lock(&up->port); - /* Disable all interrupts now */ - port->read_status_mask = 0; - writel(port->read_status_mask, - port->membase + ALTERA_JTAGUART_CONTROL_REG); + /* + * First save the IER then disable the interrupts +@@ -654,7 +654,7 @@ static void ar933x_uart_console_write(struct console *co, const char *s, + ar933x_uart_write(up, AR933X_UART_INT_REG, AR933X_UART_INT_ALLINTS); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); + if (locked) +- spin_unlock(&up->port.lock); ++ uart_port_unlock(&up->port); - free_irq(port->irq, port); + local_irq_restore(flags); } -@@ -264,33 +264,33 @@ static void altera_jtaguart_console_putc(struct uart_port *port, unsigned char c - unsigned long flags; - u32 status; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - while (!altera_jtaguart_tx_space(port, &status)) { -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); +-- +2.51.0 + +From 8c4a4930491ddfdf27bdbe80f816633b888525f8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:37 +0206 +Subject: [PATCH 045/213] serial: arc_uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-21-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/arc_uart.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c +index ad4ae19b6ce3..1aa5b2b49c26 100644 +--- a/drivers/tty/serial/arc_uart.c ++++ b/drivers/tty/serial/arc_uart.c +@@ -279,9 +279,9 @@ static irqreturn_t arc_serial_isr(int irq, void *dev_id) + if (status & RXIENB) { - if ((status & ALTERA_JTAGUART_CONTROL_AC_MSK) == 0) { - return; /* no connection activity */ - } + /* already in ISR, no need of xx_irqsave */ +- spin_lock(&port->lock); ++ uart_port_lock(port); + arc_serial_rx_chars(port, status); +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + } - cpu_relax(); -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - } - writel(c, port->membase + ALTERA_JTAGUART_DATA_REG); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - #else - static void altera_jtaguart_console_putc(struct uart_port *port, unsigned char c) - { - unsigned long flags; + if ((status & TXIENB) && (status & TXEMPTY)) { +@@ -291,12 +291,12 @@ static irqreturn_t arc_serial_isr(int irq, void *dev_id) + */ + UART_TX_IRQ_DISABLE(port); -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - while (!altera_jtaguart_tx_space(port, NULL)) { -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - cpu_relax(); -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - } - writel(c, port->membase + ALTERA_JTAGUART_DATA_REG); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - #endif +- spin_lock(&port->lock); ++ uart_port_lock(port); -diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c -index a9c419421..77835ac68 100644 ---- a/drivers/tty/serial/altera_uart.c -+++ b/drivers/tty/serial/altera_uart.c -@@ -164,13 +164,13 @@ static void altera_uart_break_ctl(struct uart_port *port, int break_state) - struct altera_uart *pp = container_of(port, struct altera_uart, port); - unsigned long flags; + if (!uart_tx_stopped(port)) + arc_serial_tx_chars(port); -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - if (break_state == -1) - pp->imr |= ALTERA_UART_CONTROL_TRBK_MSK; - else - pp->imr &= ~ALTERA_UART_CONTROL_TRBK_MSK; - altera_uart_update_ctrl_reg(pp); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } +- spin_unlock(&port->lock); ++ uart_port_unlock(port); + } - static void altera_uart_set_termios(struct uart_port *port, -@@ -187,10 +187,10 @@ static void altera_uart_set_termios(struct uart_port *port, - tty_termios_copy_hw(termios, old); - tty_termios_encode_baud_rate(termios, baud, baud); + return IRQ_HANDLED; +@@ -366,7 +366,7 @@ arc_serial_set_termios(struct uart_port *port, struct ktermios *new, + uartl = hw_val & 0xFF; + uarth = (hw_val >> 8) & 0xFF; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - uart_update_timeout(port, termios->c_cflag, baud); - altera_uart_writel(port, baudclk, ALTERA_UART_DIVISOR_REG); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - /* - * FIXME: port->read_status_mask and port->ignore_status_mask -@@ -264,12 +264,12 @@ static irqreturn_t altera_uart_interrupt(int irq, void *data) + UART_ALL_IRQ_DISABLE(port); - isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr; +@@ -391,7 +391,7 @@ arc_serial_set_termios(struct uart_port *port, struct ktermios *new, + + uart_update_timeout(port, new->c_cflag, baud); -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - if (isr & ALTERA_UART_STATUS_RRDY_MSK) - altera_uart_rx_chars(port); - if (isr & ALTERA_UART_STATUS_TRDY_MSK) - altera_uart_tx_chars(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - - return IRQ_RETVAL(isr); } -@@ -313,13 +313,13 @@ static int altera_uart_startup(struct uart_port *port) - } - } + + static const char *arc_serial_type(struct uart_port *port) +@@ -521,9 +521,9 @@ static void arc_serial_console_write(struct console *co, const char *s, + struct uart_port *port = &arc_uart_ports[co->index].port; + unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - - /* Enable RX interrupts now */ - pp->imr = ALTERA_UART_CONTROL_RRDY_MSK; - altera_uart_update_ctrl_reg(pp); - + uart_console_write(port, s, count, arc_serial_console_putchar); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - - return 0; } -@@ -329,13 +329,13 @@ static void altera_uart_shutdown(struct uart_port *port) - struct altera_uart *pp = container_of(port, struct altera_uart, port); + + static struct console arc_console = { +-- +2.51.0 + +From 380c3f9aa0b8011eb0a7ccfadcc10dda24b00cbe Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:38 +0206 +Subject: [PATCH 046/213] serial: atmel: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-22-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/atmel_serial.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c +index 85559d9b35d8..32954d7976a5 100644 +--- a/drivers/tty/serial/atmel_serial.c ++++ b/drivers/tty/serial/atmel_serial.c +@@ -861,7 +861,7 @@ static void atmel_complete_tx_dma(void *arg) + struct dma_chan *chan = atmel_port->chan_tx; unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - /* Disable all interrupts now */ - pp->imr = 0; - altera_uart_update_ctrl_reg(pp); + if (chan) + dmaengine_terminate_all(chan); +@@ -893,7 +893,7 @@ static void atmel_complete_tx_dma(void *arg) + atmel_port->tx_done_mask); + } - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); + } - if (port->irq) - free_irq(port->irq, port); -diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c -index b5a7404cb..eabbf8afc 100644 ---- a/drivers/tty/serial/amba-pl010.c -+++ b/drivers/tty/serial/amba-pl010.c -@@ -207,7 +207,7 @@ static irqreturn_t pl010_int(int irq, void *dev_id) - unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT; - int handled = 0; + static void atmel_release_tx_dma(struct uart_port *port) +@@ -1711,9 +1711,9 @@ static void atmel_tasklet_rx_func(struct tasklet_struct *t) + struct uart_port *port = &atmel_port->uart; + /* The interrupt handler does not take the lock */ - spin_lock(&port->lock); + uart_port_lock(port); - - status = readb(port->membase + UART010_IIR); - if (status) { -@@ -228,7 +228,7 @@ static irqreturn_t pl010_int(int irq, void *dev_id) - handled = 1; - } - + atmel_port->schedule_rx(port); - spin_unlock(&port->lock); + uart_port_unlock(port); - - return IRQ_RETVAL(handled); } -@@ -270,14 +270,14 @@ static void pl010_break_ctl(struct uart_port *port, int break_state) - unsigned long flags; - unsigned int lcr_h; -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - lcr_h = readb(port->membase + UART010_LCRH); - if (break_state == -1) - lcr_h |= UART01x_LCRH_BRK; - else - lcr_h &= ~UART01x_LCRH_BRK; - writel(lcr_h, port->membase + UART010_LCRH); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); + static void atmel_tasklet_tx_func(struct tasklet_struct *t) +@@ -1723,9 +1723,9 @@ static void atmel_tasklet_tx_func(struct tasklet_struct *t) + struct uart_port *port = &atmel_port->uart; + + /* The interrupt handler does not take the lock */ +- spin_lock(&port->lock); ++ uart_port_lock(port); + atmel_port->schedule_tx(port); +- spin_unlock(&port->lock); ++ uart_port_unlock(port); } - static int pl010_startup(struct uart_port *port) -@@ -385,7 +385,7 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, - if (port->fifosize > 1) - lcr_h |= UART01x_LCRH_FEN; + static void atmel_init_property(struct atmel_uart_port *atmel_port, +@@ -2175,7 +2175,7 @@ static void atmel_set_termios(struct uart_port *port, + } else + mode |= ATMEL_US_PAR_NONE; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - /* - * Update the per-port timeout. -@@ -438,22 +438,22 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, - writel(lcr_h, port->membase + UART010_LCRH); - writel(old_cr, port->membase + UART010_CR); + port->read_status_mask = ATMEL_US_OVRE; + if (termios->c_iflag & INPCK) +@@ -2377,22 +2377,22 @@ static void atmel_set_termios(struct uart_port *port, + else + atmel_disable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } - static void pl010_set_ldisc(struct uart_port *port, struct ktermios *termios) + static void atmel_set_ldisc(struct uart_port *port, struct ktermios *termios) { if (termios->c_line == N_PPS) { port->flags |= UPF_HARDPPS_CD; - spin_lock_irq(&port->lock); + uart_port_lock_irq(port); - pl010_enable_ms(port); + atmel_enable_ms(port); - spin_unlock_irq(&port->lock); + uart_port_unlock_irq(port); } else { @@ -2845,520 +4812,322 @@ index b5a7404cb..eabbf8afc 100644 if (!UART_ENABLE_MS(port, termios->c_cflag)) { - spin_lock_irq(&port->lock); + uart_port_lock_irq(port); - pl010_disable_ms(port); + atmel_disable_ms(port); - spin_unlock_irq(&port->lock); + uart_port_unlock_irq(port); } } } -diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c -index bb31ac9ca..8b7cf07e4 100644 ---- a/drivers/tty/serial/amba-pl011.c -+++ b/drivers/tty/serial/amba-pl011.c -@@ -2335,13 +2335,10 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) - - clk_enable(uap->clk); - -- local_irq_save(flags); -- if (uap->port.sysrq) -- locked = 0; -- else if (oops_in_progress) -- locked = uart_port_trylock(&uap->port); -+ if (uap->port.sysrq || oops_in_progress) -+ locked = uart_port_trylock_irqsave(&uap->port, &flags); - else -- uart_port_lock(&uap->port); -+ uart_port_lock_irqsave(&uap->port, &flags); +-- +2.51.0 + +From 951d66789a0e70d046b5e82c4693f4e0de025636 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:39 +0206 +Subject: [PATCH 047/213] serial: bcm63xx-uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Florian Fainelli +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-23-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/bcm63xx_uart.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c +index 44c27e5cefbc..b104c36ce5c0 100644 +--- a/drivers/tty/serial/bcm63xx_uart.c ++++ b/drivers/tty/serial/bcm63xx_uart.c +@@ -201,7 +201,7 @@ static void bcm_uart_break_ctl(struct uart_port *port, int ctl) + unsigned long flags; + unsigned int val; - /* - * First save the CR then disable the interrupts -@@ -2367,8 +2364,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) - pl011_write(old_cr, uap, REG_CR); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - if (locked) -- uart_port_unlock(&uap->port); -- local_irq_restore(flags); -+ uart_port_unlock_irqrestore(&uap->port, flags); + val = bcm_uart_readl(port, UART_CTL_REG); + if (ctl) +@@ -210,7 +210,7 @@ static void bcm_uart_break_ctl(struct uart_port *port, int ctl) + val &= ~UART_CTL_XMITBRK_MASK; + bcm_uart_writel(port, val, UART_CTL_REG); - clk_disable(uap->clk); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); } -diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c -index d3cb341f2..364599f25 100644 ---- a/drivers/tty/serial/apbuart.c -+++ b/drivers/tty/serial/apbuart.c -@@ -133,7 +133,7 @@ static irqreturn_t apbuart_int(int irq, void *dev_id) - struct uart_port *port = dev_id; - unsigned int status; + /* +@@ -335,7 +335,7 @@ static irqreturn_t bcm_uart_interrupt(int irq, void *dev_id) + unsigned int irqstat; + + port = dev_id; - spin_lock(&port->lock); + uart_port_lock(port); - status = UART_GET_STATUS(port); - if (status & UART_STATUS_DR) -@@ -141,7 +141,7 @@ static irqreturn_t apbuart_int(int irq, void *dev_id) - if (status & UART_STATUS_THE) - apbuart_tx_chars(port); + irqstat = bcm_uart_readl(port, UART_IR_REG); + if (irqstat & UART_RX_INT_STAT) +@@ -356,7 +356,7 @@ static irqreturn_t bcm_uart_interrupt(int irq, void *dev_id) + estat & UART_EXTINP_DCD_MASK); + } - spin_unlock(&port->lock); + uart_port_unlock(port); - return IRQ_HANDLED; } -@@ -228,7 +228,7 @@ static void apbuart_set_termios(struct uart_port *port, - if (termios->c_cflag & CRTSCTS) - cr |= UART_CTRL_FL; + +@@ -454,9 +454,9 @@ static void bcm_uart_shutdown(struct uart_port *port) + { + unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - - /* Update the per-port timeout. */ - uart_update_timeout(port, termios->c_cflag, baud); -@@ -251,7 +251,7 @@ static void apbuart_set_termios(struct uart_port *port, - UART_PUT_SCAL(port, quot); - UART_PUT_CTRL(port, cr); - + bcm_uart_writel(port, 0, UART_IR_REG); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - } - static const char *apbuart_type(struct uart_port *port) -diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c -index 924c1a893..ffd234673 100644 ---- a/drivers/tty/serial/ar933x_uart.c -+++ b/drivers/tty/serial/ar933x_uart.c -@@ -133,9 +133,9 @@ static unsigned int ar933x_uart_tx_empty(struct uart_port *port) + bcm_uart_disable(port); + bcm_uart_flush(port); +@@ -473,7 +473,7 @@ static void bcm_uart_set_termios(struct uart_port *port, struct ktermios *new, unsigned long flags; - unsigned int rdata; + int tries; -- spin_lock_irqsave(&up->port.lock, flags); -+ uart_port_lock_irqsave(&up->port, &flags); - rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG); -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - return (rdata & AR933X_UART_DATA_TX_CSR) ? 0 : TIOCSER_TEMT; - } -@@ -220,14 +220,14 @@ static void ar933x_uart_break_ctl(struct uart_port *port, int break_state) - container_of(port, struct ar933x_uart_port, port); - unsigned long flags; + /* Drain the hot tub fully before we power it off for the winter. */ + for (tries = 3; !bcm_uart_tx_empty(port) && tries; tries--) +@@ -549,7 +549,7 @@ static void bcm_uart_set_termios(struct uart_port *port, struct ktermios *new, -- spin_lock_irqsave(&up->port.lock, flags); -+ uart_port_lock_irqsave(&up->port, &flags); - if (break_state == -1) - ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, - AR933X_UART_CS_TX_BREAK); - else - ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG, - AR933X_UART_CS_TX_BREAK); -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); + uart_update_timeout(port, new->c_cflag, baud); + bcm_uart_enable(port); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); } /* -@@ -318,7 +318,7 @@ static void ar933x_uart_set_termios(struct uart_port *port, - * Ok, we're now changing the port state. Do it with - * interrupts disabled. - */ -- spin_lock_irqsave(&up->port.lock, flags); -+ uart_port_lock_irqsave(&up->port, &flags); - - /* disable the UART */ - ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG, -@@ -352,7 +352,7 @@ static void ar933x_uart_set_termios(struct uart_port *port, - AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, - AR933X_UART_CS_IF_MODE_DCE << AR933X_UART_CS_IF_MODE_S); - -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); - - if (tty_termios_baud_rate(new)) - tty_termios_encode_baud_rate(new, baud, baud); -@@ -450,7 +450,7 @@ static irqreturn_t ar933x_uart_interrupt(int irq, void *dev_id) - if ((status & AR933X_UART_CS_HOST_INT) == 0) - return IRQ_NONE; - -- spin_lock(&up->port.lock); -+ uart_port_lock(&up->port); - - status = ar933x_uart_read(up, AR933X_UART_INT_REG); - status &= ar933x_uart_read(up, AR933X_UART_INT_EN_REG); -@@ -468,7 +468,7 @@ static irqreturn_t ar933x_uart_interrupt(int irq, void *dev_id) - ar933x_uart_tx_chars(up); - } - -- spin_unlock(&up->port.lock); -+ uart_port_unlock(&up->port); - - return IRQ_HANDLED; - } -@@ -485,7 +485,7 @@ static int ar933x_uart_startup(struct uart_port *port) - if (ret) - return ret; - -- spin_lock_irqsave(&up->port.lock, flags); -+ uart_port_lock_irqsave(&up->port, &flags); - - /* Enable HOST interrupts */ - ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, -@@ -498,7 +498,7 @@ static int ar933x_uart_startup(struct uart_port *port) - /* Enable RX interrupts */ - ar933x_uart_start_rx_interrupt(up); - -- spin_unlock_irqrestore(&up->port.lock, flags); -+ uart_port_unlock_irqrestore(&up->port, flags); - - return 0; - } -@@ -632,9 +632,9 @@ static void ar933x_uart_console_write(struct console *co, const char *s, - if (up->port.sysrq) +@@ -715,9 +715,9 @@ static void bcm_console_write(struct console *co, const char *s, + /* bcm_uart_interrupt() already took the lock */ locked = 0; - else if (oops_in_progress) -- locked = spin_trylock(&up->port.lock); -+ locked = uart_port_trylock(&up->port); - else -- spin_lock(&up->port.lock); -+ uart_port_lock(&up->port); + } else if (oops_in_progress) { +- locked = spin_trylock(&port->lock); ++ locked = uart_port_trylock(port); + } else { +- spin_lock(&port->lock); ++ uart_port_lock(port); + locked = 1; + } - /* - * First save the IER then disable the interrupts -@@ -654,7 +654,7 @@ static void ar933x_uart_console_write(struct console *co, const char *s, - ar933x_uart_write(up, AR933X_UART_INT_REG, AR933X_UART_INT_ALLINTS); +@@ -728,7 +728,7 @@ static void bcm_console_write(struct console *co, const char *s, + wait_for_xmitr(port); if (locked) -- spin_unlock(&up->port.lock); -+ uart_port_unlock(&up->port); - - local_irq_restore(flags); - } -diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c -index ad4ae19b6..1aa5b2b49 100644 ---- a/drivers/tty/serial/arc_uart.c -+++ b/drivers/tty/serial/arc_uart.c -@@ -279,9 +279,9 @@ static irqreturn_t arc_serial_isr(int irq, void *dev_id) - if (status & RXIENB) { - - /* already in ISR, no need of xx_irqsave */ -- spin_lock(&port->lock); -+ uart_port_lock(port); - arc_serial_rx_chars(port, status); - spin_unlock(&port->lock); + uart_port_unlock(port); - } + local_irq_restore(flags); + } - if ((status & TXIENB) && (status & TXEMPTY)) { -@@ -291,12 +291,12 @@ static irqreturn_t arc_serial_isr(int irq, void *dev_id) - */ - UART_TX_IRQ_DISABLE(port); +-- +2.51.0 + +From 0ec6c15f657569d49e94902136c4dde55317b21c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:40 +0206 +Subject: [PATCH 048/213] serial: cpm_uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-24-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/cpm_uart.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/tty/serial/cpm_uart.c b/drivers/tty/serial/cpm_uart.c +index 626423022d62..be4af6eda4c2 100644 +--- a/drivers/tty/serial/cpm_uart.c ++++ b/drivers/tty/serial/cpm_uart.c +@@ -569,7 +569,7 @@ static void cpm_uart_set_termios(struct uart_port *port, + if ((termios->c_cflag & CREAD) == 0) + port->read_status_mask &= ~BD_SC_EMPTY; -- spin_lock(&port->lock); -+ uart_port_lock(port); +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); - if (!uart_tx_stopped(port)) - arc_serial_tx_chars(port); + if (IS_SMC(pinfo)) { + unsigned int bits = tty_get_frame_size(termios->c_cflag); +@@ -609,7 +609,7 @@ static void cpm_uart_set_termios(struct uart_port *port, + clk_set_rate(pinfo->clk, baud); + else + cpm_setbrg(pinfo->brg - 1, baud); +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + } -- spin_unlock(&port->lock); -+ uart_port_unlock(port); + static const char *cpm_uart_type(struct uart_port *port) +@@ -1386,9 +1386,9 @@ static void cpm_uart_console_write(struct console *co, const char *s, + cpm_uart_early_write(pinfo, s, count, true); + local_irq_restore(flags); + } else { +- spin_lock_irqsave(&pinfo->port.lock, flags); ++ uart_port_lock_irqsave(&pinfo->port, &flags); + cpm_uart_early_write(pinfo, s, count, true); +- spin_unlock_irqrestore(&pinfo->port.lock, flags); ++ uart_port_unlock_irqrestore(&pinfo->port, flags); } + } - return IRQ_HANDLED; -@@ -366,7 +366,7 @@ arc_serial_set_termios(struct uart_port *port, struct ktermios *new, - uartl = hw_val & 0xFF; - uarth = (hw_val >> 8) & 0xFF; +-- +2.51.0 + +From b6e89a9d619318c8676f75e2ce5ca556d6d2f6da Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:41 +0206 +Subject: [PATCH 049/213] serial: digicolor: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Acked-by: Baruch Siach +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-25-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/digicolor-usart.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c +index 128b5479e813..5004125f3045 100644 +--- a/drivers/tty/serial/digicolor-usart.c ++++ b/drivers/tty/serial/digicolor-usart.c +@@ -133,7 +133,7 @@ static void digicolor_uart_rx(struct uart_port *port) + { + unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - UART_ALL_IRQ_DISABLE(port); - -@@ -391,7 +391,7 @@ arc_serial_set_termios(struct uart_port *port, struct ktermios *new, - - uart_update_timeout(port, new->c_cflag, baud); + while (1) { + u8 status, ch, ch_flag; +@@ -172,7 +172,7 @@ static void digicolor_uart_rx(struct uart_port *port) + ch_flag); + } - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); - } - static const char *arc_serial_type(struct uart_port *port) -@@ -521,9 +521,9 @@ static void arc_serial_console_write(struct console *co, const char *s, - struct uart_port *port = &arc_uart_ports[co->index].port; - unsigned long flags; + tty_flip_buffer_push(&port->state->port); + } +@@ -185,7 +185,7 @@ static void digicolor_uart_tx(struct uart_port *port) + if (digicolor_uart_tx_full(port)) + return; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - uart_console_write(port, s, count, arc_serial_console_putchar); + + if (port->x_char) { + writeb_relaxed(port->x_char, port->membase + UA_EMI_REC); +@@ -211,7 +211,7 @@ static void digicolor_uart_tx(struct uart_port *port) + uart_write_wakeup(port); + + out: - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } - static struct console arc_console = { -diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c -index bcca5627a..5a3f2fc47 100644 ---- a/drivers/tty/serial/atmel_serial.c -+++ b/drivers/tty/serial/atmel_serial.c -@@ -861,7 +861,7 @@ static void atmel_complete_tx_dma(void *arg) - struct dma_chan *chan = atmel_port->chan_tx; - unsigned long flags; + static irqreturn_t digicolor_uart_int(int irq, void *dev_id) +@@ -333,7 +333,7 @@ static void digicolor_uart_set_termios(struct uart_port *port, + port->ignore_status_mask |= UA_STATUS_OVERRUN_ERR + | UA_STATUS_PARITY_ERR | UA_STATUS_FRAME_ERR; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); - if (chan) - dmaengine_terminate_all(chan); -@@ -893,7 +893,7 @@ static void atmel_complete_tx_dma(void *arg) - atmel_port->tx_done_mask); - } + uart_update_timeout(port, termios->c_cflag, baud); + +@@ -341,7 +341,7 @@ static void digicolor_uart_set_termios(struct uart_port *port, + writeb_relaxed(divisor & 0xff, port->membase + UA_HBAUD_LO); + writeb_relaxed(divisor >> 8, port->membase + UA_HBAUD_HI); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } - static void atmel_release_tx_dma(struct uart_port *port) -@@ -1711,9 +1711,9 @@ static void atmel_tasklet_rx_func(struct tasklet_struct *t) - struct uart_port *port = &atmel_port->uart; - - /* The interrupt handler does not take the lock */ -- spin_lock(&port->lock); -+ uart_port_lock(port); - atmel_port->schedule_rx(port); -- spin_unlock(&port->lock); -+ uart_port_unlock(port); - } - - static void atmel_tasklet_tx_func(struct tasklet_struct *t) -@@ -1723,9 +1723,9 @@ static void atmel_tasklet_tx_func(struct tasklet_struct *t) - struct uart_port *port = &atmel_port->uart; - - /* The interrupt handler does not take the lock */ -- spin_lock(&port->lock); -+ uart_port_lock(port); - atmel_port->schedule_tx(port); -- spin_unlock(&port->lock); -+ uart_port_unlock(port); - } - - static void atmel_init_property(struct atmel_uart_port *atmel_port, -@@ -2175,7 +2175,7 @@ static void atmel_set_termios(struct uart_port *port, - } else - mode |= ATMEL_US_PAR_NONE; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - port->read_status_mask = ATMEL_US_OVRE; - if (termios->c_iflag & INPCK) -@@ -2377,22 +2377,22 @@ static void atmel_set_termios(struct uart_port *port, - else - atmel_disable_ms(port); - -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - static void atmel_set_ldisc(struct uart_port *port, struct ktermios *termios) - { - if (termios->c_line == N_PPS) { - port->flags |= UPF_HARDPPS_CD; -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - atmel_enable_ms(port); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - } else { - port->flags &= ~UPF_HARDPPS_CD; - if (!UART_ENABLE_MS(port, termios->c_cflag)) { -- spin_lock_irq(&port->lock); -+ uart_port_lock_irq(port); - atmel_disable_ms(port); -- spin_unlock_irq(&port->lock); -+ uart_port_unlock_irq(port); - } - } - } -diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c -index 44c27e5ce..b104c36ce 100644 ---- a/drivers/tty/serial/bcm63xx_uart.c -+++ b/drivers/tty/serial/bcm63xx_uart.c -@@ -201,7 +201,7 @@ static void bcm_uart_break_ctl(struct uart_port *port, int ctl) - unsigned long flags; - unsigned int val; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - val = bcm_uart_readl(port, UART_CTL_REG); - if (ctl) -@@ -210,7 +210,7 @@ static void bcm_uart_break_ctl(struct uart_port *port, int ctl) - val &= ~UART_CTL_XMITBRK_MASK; - bcm_uart_writel(port, val, UART_CTL_REG); - -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - /* -@@ -335,7 +335,7 @@ static irqreturn_t bcm_uart_interrupt(int irq, void *dev_id) - unsigned int irqstat; - - port = dev_id; -- spin_lock(&port->lock); -+ uart_port_lock(port); - - irqstat = bcm_uart_readl(port, UART_IR_REG); - if (irqstat & UART_RX_INT_STAT) -@@ -356,7 +356,7 @@ static irqreturn_t bcm_uart_interrupt(int irq, void *dev_id) - estat & UART_EXTINP_DCD_MASK); - } - -- spin_unlock(&port->lock); -+ uart_port_unlock(port); - return IRQ_HANDLED; - } - -@@ -454,9 +454,9 @@ static void bcm_uart_shutdown(struct uart_port *port) - { - unsigned long flags; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - bcm_uart_writel(port, 0, UART_IR_REG); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - - bcm_uart_disable(port); - bcm_uart_flush(port); -@@ -473,7 +473,7 @@ static void bcm_uart_set_termios(struct uart_port *port, struct ktermios *new, - unsigned long flags; - int tries; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - /* Drain the hot tub fully before we power it off for the winter. */ - for (tries = 3; !bcm_uart_tx_empty(port) && tries; tries--) -@@ -549,7 +549,7 @@ static void bcm_uart_set_termios(struct uart_port *port, struct ktermios *new, - - uart_update_timeout(port, new->c_cflag, baud); - bcm_uart_enable(port); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - /* -@@ -715,9 +715,9 @@ static void bcm_console_write(struct console *co, const char *s, - /* bcm_uart_interrupt() already took the lock */ - locked = 0; - } else if (oops_in_progress) { -- locked = spin_trylock(&port->lock); -+ locked = uart_port_trylock(port); - } else { -- spin_lock(&port->lock); -+ uart_port_lock(port); - locked = 1; - } - -@@ -728,7 +728,7 @@ static void bcm_console_write(struct console *co, const char *s, - wait_for_xmitr(port); - - if (locked) -- spin_unlock(&port->lock); -+ uart_port_unlock(port); - local_irq_restore(flags); - } - -diff --git a/drivers/tty/serial/cpm_uart.c b/drivers/tty/serial/cpm_uart.c -index 626423022..be4af6eda 100644 ---- a/drivers/tty/serial/cpm_uart.c -+++ b/drivers/tty/serial/cpm_uart.c -@@ -569,7 +569,7 @@ static void cpm_uart_set_termios(struct uart_port *port, - if ((termios->c_cflag & CREAD) == 0) - port->read_status_mask &= ~BD_SC_EMPTY; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - if (IS_SMC(pinfo)) { - unsigned int bits = tty_get_frame_size(termios->c_cflag); -@@ -609,7 +609,7 @@ static void cpm_uart_set_termios(struct uart_port *port, - clk_set_rate(pinfo->clk, baud); - else - cpm_setbrg(pinfo->brg - 1, baud); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - static const char *cpm_uart_type(struct uart_port *port) -@@ -1386,9 +1386,9 @@ static void cpm_uart_console_write(struct console *co, const char *s, - cpm_uart_early_write(pinfo, s, count, true); - local_irq_restore(flags); - } else { -- spin_lock_irqsave(&pinfo->port.lock, flags); -+ uart_port_lock_irqsave(&pinfo->port, &flags); - cpm_uart_early_write(pinfo, s, count, true); -- spin_unlock_irqrestore(&pinfo->port.lock, flags); -+ uart_port_unlock_irqrestore(&pinfo->port, flags); - } - } - -diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c -index 128b5479e..5004125f3 100644 ---- a/drivers/tty/serial/digicolor-usart.c -+++ b/drivers/tty/serial/digicolor-usart.c -@@ -133,7 +133,7 @@ static void digicolor_uart_rx(struct uart_port *port) - { - unsigned long flags; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - while (1) { - u8 status, ch, ch_flag; -@@ -172,7 +172,7 @@ static void digicolor_uart_rx(struct uart_port *port) - ch_flag); - } - -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - - tty_flip_buffer_push(&port->state->port); - } -@@ -185,7 +185,7 @@ static void digicolor_uart_tx(struct uart_port *port) - if (digicolor_uart_tx_full(port)) - return; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - if (port->x_char) { - writeb_relaxed(port->x_char, port->membase + UA_EMI_REC); -@@ -211,7 +211,7 @@ static void digicolor_uart_tx(struct uart_port *port) - uart_write_wakeup(port); - - out: -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - static irqreturn_t digicolor_uart_int(int irq, void *dev_id) -@@ -333,7 +333,7 @@ static void digicolor_uart_set_termios(struct uart_port *port, - port->ignore_status_mask |= UA_STATUS_OVERRUN_ERR - | UA_STATUS_PARITY_ERR | UA_STATUS_FRAME_ERR; - -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - - uart_update_timeout(port, termios->c_cflag, baud); - -@@ -341,7 +341,7 @@ static void digicolor_uart_set_termios(struct uart_port *port, - writeb_relaxed(divisor & 0xff, port->membase + UA_HBAUD_LO); - writeb_relaxed(divisor >> 8, port->membase + UA_HBAUD_HI); - -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - } - - static const char *digicolor_uart_type(struct uart_port *port) -@@ -398,14 +398,14 @@ static void digicolor_uart_console_write(struct console *co, const char *c, - int locked = 1; + static const char *digicolor_uart_type(struct uart_port *port) +@@ -398,14 +398,14 @@ static void digicolor_uart_console_write(struct console *co, const char *c, + int locked = 1; if (oops_in_progress) - locked = spin_trylock_irqsave(&port->lock, flags); @@ -3375,8 +5144,49 @@ index 128b5479e..5004125f3 100644 /* Wait for transmitter to become empty */ do { +-- +2.51.0 + +From a81adb818e3a678163ec9a9d259a1bac1266e02b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:42 +0206 +Subject: [PATCH 050/213] serial: dz: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-26-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/dz.c | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c -index 667f52e83..6df7af9ed 100644 +index 667f52e83277..6df7af9edc1c 100644 --- a/drivers/tty/serial/dz.c +++ b/drivers/tty/serial/dz.c @@ -268,9 +268,9 @@ static inline void dz_transmit_chars(struct dz_mux *mux) @@ -3500,8 +5310,49 @@ index 667f52e83..6df7af9ed 100644 do { trdy = dz_in(dport, DZ_CSR); +-- +2.51.0 + +From 48c933a3deb482d5ee5ec94aa5c65ff07dbcebd8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:43 +0206 +Subject: [PATCH 051/213] serial: linflexuart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-27-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/fsl_linflexuart.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + diff --git a/drivers/tty/serial/fsl_linflexuart.c b/drivers/tty/serial/fsl_linflexuart.c -index 249cb380c..7fa809a40 100644 +index 249cb380c3c6..7fa809a405e8 100644 --- a/drivers/tty/serial/fsl_linflexuart.c +++ b/drivers/tty/serial/fsl_linflexuart.c @@ -203,7 +203,7 @@ static irqreturn_t linflex_txint(int irq, void *dev_id) @@ -3607,8 +5458,49 @@ index 249cb380c..7fa809a40 100644 } /* +-- +2.51.0 + +From 1079bf8cad69f11aaca06af3e75005f7cc67d4cd Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:44 +0206 +Subject: [PATCH 052/213] serial: fsl_lpuart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-28-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/fsl_lpuart.c | 88 ++++++++++++++++----------------- + 1 file changed, 44 insertions(+), 44 deletions(-) + diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c -index 8bd0f8e45..58c2c460c 100644 +index 70fa1e6aebe1..686bfb2cfe65 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -532,9 +532,9 @@ static void lpuart_dma_tx_complete(void *arg) @@ -3761,7 +5653,7 @@ index 8bd0f8e45..58c2c460c 100644 } } -@@ -1802,14 +1802,14 @@ static void lpuart_hw_setup(struct lpuart_port *sport) +@@ -1819,14 +1819,14 @@ static void lpuart_hw_setup(struct lpuart_port *sport) { unsigned long flags; @@ -3778,7 +5670,7 @@ index 8bd0f8e45..58c2c460c 100644 } static int lpuart_startup(struct uart_port *port) -@@ -1859,7 +1859,7 @@ static void lpuart32_hw_setup(struct lpuart_port *sport) +@@ -1876,7 +1876,7 @@ static void lpuart32_hw_setup(struct lpuart_port *sport) { unsigned long flags; @@ -3787,7 +5679,7 @@ index 8bd0f8e45..58c2c460c 100644 lpuart32_hw_disable(sport); -@@ -1869,7 +1869,7 @@ static void lpuart32_hw_setup(struct lpuart_port *sport) +@@ -1886,7 +1886,7 @@ static void lpuart32_hw_setup(struct lpuart_port *sport) lpuart32_setup_watermark_enable(sport); lpuart32_configure(sport); @@ -3796,7 +5688,7 @@ index 8bd0f8e45..58c2c460c 100644 } static int lpuart32_startup(struct uart_port *port) -@@ -1932,7 +1932,7 @@ static void lpuart_shutdown(struct uart_port *port) +@@ -1949,7 +1949,7 @@ static void lpuart_shutdown(struct uart_port *port) unsigned char temp; unsigned long flags; @@ -3805,7 +5697,7 @@ index 8bd0f8e45..58c2c460c 100644 /* disable Rx/Tx and interrupts */ temp = readb(port->membase + UARTCR2); -@@ -1940,7 +1940,7 @@ static void lpuart_shutdown(struct uart_port *port) +@@ -1957,7 +1957,7 @@ static void lpuart_shutdown(struct uart_port *port) UARTCR2_TIE | UARTCR2_TCIE | UARTCR2_RIE); writeb(temp, port->membase + UARTCR2); @@ -3814,7 +5706,7 @@ index 8bd0f8e45..58c2c460c 100644 lpuart_dma_shutdown(sport); } -@@ -1952,7 +1952,7 @@ static void lpuart32_shutdown(struct uart_port *port) +@@ -1969,7 +1969,7 @@ static void lpuart32_shutdown(struct uart_port *port) unsigned long temp; unsigned long flags; @@ -3823,7 +5715,7 @@ index 8bd0f8e45..58c2c460c 100644 /* clear status */ temp = lpuart32_read(&sport->port, UARTSTAT); -@@ -1969,7 +1969,7 @@ static void lpuart32_shutdown(struct uart_port *port) +@@ -1986,7 +1986,7 @@ static void lpuart32_shutdown(struct uart_port *port) UARTCTRL_TIE | UARTCTRL_TCIE | UARTCTRL_RIE | UARTCTRL_SBK); lpuart32_write(port, temp, UARTCTRL); @@ -3832,7 +5724,7 @@ index 8bd0f8e45..58c2c460c 100644 lpuart_dma_shutdown(sport); } -@@ -2069,7 +2069,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2086,7 +2086,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, if (old && sport->lpuart_dma_rx_use) lpuart_dma_rx_free(&sport->port); @@ -3841,7 +5733,7 @@ index 8bd0f8e45..58c2c460c 100644 sport->port.read_status_mask = 0; if (termios->c_iflag & INPCK) -@@ -2124,7 +2124,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2141,7 +2141,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, sport->lpuart_dma_rx_use = false; } @@ -3850,7 +5742,7 @@ index 8bd0f8e45..58c2c460c 100644 } static void __lpuart32_serial_setbrg(struct uart_port *port, -@@ -2304,7 +2304,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2321,7 +2321,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, if (old && sport->lpuart_dma_rx_use) lpuart_dma_rx_free(&sport->port); @@ -3859,7 +5751,7 @@ index 8bd0f8e45..58c2c460c 100644 sport->port.read_status_mask = 0; if (termios->c_iflag & INPCK) -@@ -2362,7 +2362,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2379,7 +2379,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, sport->lpuart_dma_rx_use = false; } @@ -3868,7 +5760,7 @@ index 8bd0f8e45..58c2c460c 100644 } static const char *lpuart_type(struct uart_port *port) -@@ -2480,9 +2480,9 @@ lpuart_console_write(struct console *co, const char *s, unsigned int count) +@@ -2497,9 +2497,9 @@ lpuart_console_write(struct console *co, const char *s, unsigned int count) int locked = 1; if (oops_in_progress) @@ -3880,7 +5772,7 @@ index 8bd0f8e45..58c2c460c 100644 /* first save CR2 and then disable interrupts */ cr2 = old_cr2 = readb(sport->port.membase + UARTCR2); -@@ -2498,7 +2498,7 @@ lpuart_console_write(struct console *co, const char *s, unsigned int count) +@@ -2515,7 +2515,7 @@ lpuart_console_write(struct console *co, const char *s, unsigned int count) writeb(old_cr2, sport->port.membase + UARTCR2); if (locked) @@ -3889,7 +5781,7 @@ index 8bd0f8e45..58c2c460c 100644 } static void -@@ -2510,9 +2510,9 @@ lpuart32_console_write(struct console *co, const char *s, unsigned int count) +@@ -2527,9 +2527,9 @@ lpuart32_console_write(struct console *co, const char *s, unsigned int count) int locked = 1; if (oops_in_progress) @@ -3901,7 +5793,7 @@ index 8bd0f8e45..58c2c460c 100644 /* first save CR2 and then disable interrupts */ cr = old_cr = lpuart32_read(&sport->port, UARTCTRL); -@@ -2528,7 +2528,7 @@ lpuart32_console_write(struct console *co, const char *s, unsigned int count) +@@ -2545,7 +2545,7 @@ lpuart32_console_write(struct console *co, const char *s, unsigned int count) lpuart32_write(&sport->port, old_cr, UARTCTRL); if (locked) @@ -3910,7 +5802,7 @@ index 8bd0f8e45..58c2c460c 100644 } /* -@@ -3093,7 +3093,7 @@ static int lpuart_suspend(struct device *dev) +@@ -3110,7 +3110,7 @@ static int lpuart_suspend(struct device *dev) uart_suspend_port(&lpuart_reg, &sport->port); if (lpuart_uport_is_active(sport)) { @@ -3919,7 +5811,7 @@ index 8bd0f8e45..58c2c460c 100644 if (lpuart_is_32(sport)) { /* disable Rx/Tx and interrupts */ temp = lpuart32_read(&sport->port, UARTCTRL); -@@ -3105,7 +3105,7 @@ static int lpuart_suspend(struct device *dev) +@@ -3122,7 +3122,7 @@ static int lpuart_suspend(struct device *dev) temp &= ~(UARTCR2_TE | UARTCR2_TIE | UARTCR2_TCIE); writeb(temp, sport->port.membase + UARTCR2); } @@ -3928,7 +5820,7 @@ index 8bd0f8e45..58c2c460c 100644 if (sport->lpuart_dma_rx_use) { /* -@@ -3118,7 +3118,7 @@ static int lpuart_suspend(struct device *dev) +@@ -3135,7 +3135,7 @@ static int lpuart_suspend(struct device *dev) lpuart_dma_rx_free(&sport->port); /* Disable Rx DMA to use UART port as wakeup source */ @@ -3937,7 +5829,7 @@ index 8bd0f8e45..58c2c460c 100644 if (lpuart_is_32(sport)) { temp = lpuart32_read(&sport->port, UARTBAUD); lpuart32_write(&sport->port, temp & ~UARTBAUD_RDMAE, -@@ -3127,11 +3127,11 @@ static int lpuart_suspend(struct device *dev) +@@ -3144,11 +3144,11 @@ static int lpuart_suspend(struct device *dev) writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_RDMAS, sport->port.membase + UARTCR5); } @@ -3951,7 +5843,7 @@ index 8bd0f8e45..58c2c460c 100644 if (lpuart_is_32(sport)) { temp = lpuart32_read(&sport->port, UARTBAUD); temp &= ~UARTBAUD_TDMAE; -@@ -3141,7 +3141,7 @@ static int lpuart_suspend(struct device *dev) +@@ -3158,7 +3158,7 @@ static int lpuart_suspend(struct device *dev) temp &= ~UARTCR5_TDMAS; writeb(temp, sport->port.membase + UARTCR5); } @@ -3960,8 +5852,49 @@ index 8bd0f8e45..58c2c460c 100644 sport->dma_tx_in_progress = false; dmaengine_terminate_sync(sport->dma_tx_chan); } +-- +2.51.0 + +From 67221436bc6358f77fb962ac85e251828dad8d84 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:45 +0206 +Subject: [PATCH 053/213] serial: icom: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-29-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/icom.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c -index 819f957b6..a75eafbcb 100644 +index 819f957b6b84..a75eafbcbea3 100644 --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c @@ -929,7 +929,7 @@ static inline void check_modem_status(struct icom_port *icom_port) @@ -4075,11 +6008,52 @@ index 819f957b6..a75eafbcb 100644 } static const char *icom_type(struct uart_port *port) +-- +2.51.0 + +From 9da0cf6c543beb51d00b69e2a30d97a09993f5cf Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:46 +0206 +Subject: [PATCH 054/213] serial: imx: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-30-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/imx.c | 84 ++++++++++++++++++++-------------------- + 1 file changed, 42 insertions(+), 42 deletions(-) + diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c -index 349d4849b..3934ab8fe 100644 +index 60d48d857b1c..6db6daffde1a 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c -@@ -586,7 +586,7 @@ static void imx_uart_dma_tx_callback(void *data) +@@ -587,7 +587,7 @@ static void imx_uart_dma_tx_callback(void *data) unsigned long flags; u32 ucr1; @@ -4088,7 +6062,7 @@ index 349d4849b..3934ab8fe 100644 dma_unmap_sg(sport->port.dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE); -@@ -611,7 +611,7 @@ static void imx_uart_dma_tx_callback(void *data) +@@ -612,7 +612,7 @@ static void imx_uart_dma_tx_callback(void *data) imx_uart_writel(sport, ucr4, UCR4); } @@ -4097,7 +6071,7 @@ index 349d4849b..3934ab8fe 100644 } /* called with port.lock taken and irqs off */ -@@ -797,11 +797,11 @@ static irqreturn_t imx_uart_rtsint(int irq, void *dev_id) +@@ -798,11 +798,11 @@ static irqreturn_t imx_uart_rtsint(int irq, void *dev_id) struct imx_port *sport = dev_id; irqreturn_t ret; @@ -4111,7 +6085,7 @@ index 349d4849b..3934ab8fe 100644 return ret; } -@@ -810,9 +810,9 @@ static irqreturn_t imx_uart_txint(int irq, void *dev_id) +@@ -811,9 +811,9 @@ static irqreturn_t imx_uart_txint(int irq, void *dev_id) { struct imx_port *sport = dev_id; @@ -4123,7 +6097,7 @@ index 349d4849b..3934ab8fe 100644 return IRQ_HANDLED; } -@@ -926,11 +926,11 @@ static irqreturn_t imx_uart_rxint(int irq, void *dev_id) +@@ -927,11 +927,11 @@ static irqreturn_t imx_uart_rxint(int irq, void *dev_id) struct imx_port *sport = dev_id; irqreturn_t ret; @@ -4137,7 +6111,7 @@ index 349d4849b..3934ab8fe 100644 return ret; } -@@ -993,7 +993,7 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) +@@ -994,7 +994,7 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; @@ -4146,7 +6120,7 @@ index 349d4849b..3934ab8fe 100644 usr1 = imx_uart_readl(sport, USR1); usr2 = imx_uart_readl(sport, USR2); -@@ -1063,7 +1063,7 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) +@@ -1064,7 +1064,7 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) ret = IRQ_HANDLED; } @@ -4155,7 +6129,7 @@ index 349d4849b..3934ab8fe 100644 return ret; } -@@ -1146,7 +1146,7 @@ static void imx_uart_break_ctl(struct uart_port *port, int break_state) +@@ -1147,7 +1147,7 @@ static void imx_uart_break_ctl(struct uart_port *port, int break_state) unsigned long flags; u32 ucr1; @@ -4164,7 +6138,7 @@ index 349d4849b..3934ab8fe 100644 ucr1 = imx_uart_readl(sport, UCR1) & ~UCR1_SNDBRK; -@@ -1155,7 +1155,7 @@ static void imx_uart_break_ctl(struct uart_port *port, int break_state) +@@ -1156,7 +1156,7 @@ static void imx_uart_break_ctl(struct uart_port *port, int break_state) imx_uart_writel(sport, ucr1, UCR1); @@ -4173,7 +6147,7 @@ index 349d4849b..3934ab8fe 100644 } /* -@@ -1168,9 +1168,9 @@ static void imx_uart_timeout(struct timer_list *t) +@@ -1169,9 +1169,9 @@ static void imx_uart_timeout(struct timer_list *t) unsigned long flags; if (sport->port.state) { @@ -4185,7 +6159,7 @@ index 349d4849b..3934ab8fe 100644 mod_timer(&sport->timer, jiffies + MCTRL_TIMEOUT); } -@@ -1200,9 +1200,9 @@ static void imx_uart_dma_rx_callback(void *data) +@@ -1201,9 +1201,9 @@ static void imx_uart_dma_rx_callback(void *data) status = dmaengine_tx_status(chan, sport->rx_cookie, &state); if (status == DMA_ERROR) { @@ -4197,7 +6171,7 @@ index 349d4849b..3934ab8fe 100644 return; } -@@ -1231,9 +1231,9 @@ static void imx_uart_dma_rx_callback(void *data) +@@ -1232,9 +1232,9 @@ static void imx_uart_dma_rx_callback(void *data) r_bytes = rx_ring->head - rx_ring->tail; /* If we received something, check for 0xff flood */ @@ -4209,7 +6183,7 @@ index 349d4849b..3934ab8fe 100644 if (!(sport->port.ignore_status_mask & URXD_DUMMY_READ)) { -@@ -1491,7 +1491,7 @@ static int imx_uart_startup(struct uart_port *port) +@@ -1498,7 +1498,7 @@ static int imx_uart_startup(struct uart_port *port) if (!uart_console(port) && imx_uart_dma_init(sport) == 0) dma_is_inited = 1; @@ -4218,7 +6192,7 @@ index 349d4849b..3934ab8fe 100644 /* Reset fifo's and state machines */ imx_uart_soft_reset(sport); -@@ -1564,7 +1564,7 @@ static int imx_uart_startup(struct uart_port *port) +@@ -1571,7 +1571,7 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_disable_loopback_rs485(sport); @@ -4227,7 +6201,7 @@ index 349d4849b..3934ab8fe 100644 return 0; } -@@ -1589,21 +1589,21 @@ static void imx_uart_shutdown(struct uart_port *port) +@@ -1596,21 +1596,21 @@ static void imx_uart_shutdown(struct uart_port *port) sport->dma_is_rxing = 0; } @@ -4241,7 +6215,7 @@ index 349d4849b..3934ab8fe 100644 imx_uart_dma_exit(sport); } - mctrl_gpio_disable_ms(sport->gpios); + mctrl_gpio_disable_ms_sync(sport->gpios); - spin_lock_irqsave(&sport->port.lock, flags); + uart_port_lock_irqsave(&sport->port, &flags); @@ -4253,7 +6227,7 @@ index 349d4849b..3934ab8fe 100644 /* * Stop our timer. -@@ -1614,7 +1614,7 @@ static void imx_uart_shutdown(struct uart_port *port) +@@ -1621,7 +1621,7 @@ static void imx_uart_shutdown(struct uart_port *port) * Disable all interrupts, port and break condition. */ @@ -4262,7 +6236,7 @@ index 349d4849b..3934ab8fe 100644 ucr1 = imx_uart_readl(sport, UCR1); ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_RXDMAEN | -@@ -1636,7 +1636,7 @@ static void imx_uart_shutdown(struct uart_port *port) +@@ -1643,7 +1643,7 @@ static void imx_uart_shutdown(struct uart_port *port) ucr4 &= ~UCR4_TCEN; imx_uart_writel(sport, ucr4, UCR4); @@ -4271,7 +6245,7 @@ index 349d4849b..3934ab8fe 100644 clk_disable_unprepare(sport->clk_per); clk_disable_unprepare(sport->clk_ipg); -@@ -1699,7 +1699,7 @@ imx_uart_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -1706,7 +1706,7 @@ imx_uart_set_termios(struct uart_port *port, struct ktermios *termios, baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 16); quot = uart_get_divisor(port, baud); @@ -4280,7 +6254,7 @@ index 349d4849b..3934ab8fe 100644 /* * Read current UCR2 and save it for future use, then clear all the bits -@@ -1827,7 +1827,7 @@ imx_uart_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -1834,7 +1834,7 @@ imx_uart_set_termios(struct uart_port *port, struct ktermios *termios, if (UART_ENABLE_MS(&sport->port, termios->c_cflag)) imx_uart_enable_ms(&sport->port); @@ -4289,16 +6263,16 @@ index 349d4849b..3934ab8fe 100644 } static const char *imx_uart_type(struct uart_port *port) -@@ -1889,7 +1889,7 @@ static int imx_uart_poll_init(struct uart_port *port) +@@ -1896,7 +1896,7 @@ static int imx_uart_poll_init(struct uart_port *port) - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); - spin_lock_irqsave(&sport->port.lock, flags); + uart_port_lock_irqsave(&sport->port, &flags); /* * Be careful about the order of enabling bits here. First enable the -@@ -1917,7 +1917,7 @@ static int imx_uart_poll_init(struct uart_port *port) +@@ -1924,7 +1924,7 @@ static int imx_uart_poll_init(struct uart_port *port) imx_uart_writel(sport, ucr1 | UCR1_RRDYEN, UCR1); imx_uart_writel(sport, ucr2 | UCR2_ATEN, UCR2); @@ -4307,7 +6281,7 @@ index 349d4849b..3934ab8fe 100644 return 0; } -@@ -2037,9 +2037,9 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) +@@ -2044,9 +2044,9 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) if (sport->port.sysrq) locked = 0; else if (oops_in_progress) @@ -4319,7 +6293,7 @@ index 349d4849b..3934ab8fe 100644 /* * First, save UCR1/2/3 and then disable interrupts -@@ -2067,7 +2067,7 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) +@@ -2074,7 +2074,7 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) imx_uart_ucrs_restore(sport, &old_ucr); if (locked) @@ -4328,7 +6302,7 @@ index 349d4849b..3934ab8fe 100644 } /* -@@ -2225,10 +2225,10 @@ static enum hrtimer_restart imx_trigger_start_tx(struct hrtimer *t) +@@ -2232,10 +2232,10 @@ static enum hrtimer_restart imx_trigger_start_tx(struct hrtimer *t) struct imx_port *sport = container_of(t, struct imx_port, trigger_start_tx); unsigned long flags; @@ -4341,7 +6315,7 @@ index 349d4849b..3934ab8fe 100644 return HRTIMER_NORESTART; } -@@ -2238,10 +2238,10 @@ static enum hrtimer_restart imx_trigger_stop_tx(struct hrtimer *t) +@@ -2245,10 +2245,10 @@ static enum hrtimer_restart imx_trigger_stop_tx(struct hrtimer *t) struct imx_port *sport = container_of(t, struct imx_port, trigger_stop_tx); unsigned long flags; @@ -4354,7 +6328,7 @@ index 349d4849b..3934ab8fe 100644 return HRTIMER_NORESTART; } -@@ -2508,9 +2508,9 @@ static void imx_uart_restore_context(struct imx_port *sport) +@@ -2515,9 +2515,9 @@ static void imx_uart_restore_context(struct imx_port *sport) { unsigned long flags; @@ -4366,7 +6340,7 @@ index 349d4849b..3934ab8fe 100644 return; } -@@ -2525,7 +2525,7 @@ static void imx_uart_restore_context(struct imx_port *sport) +@@ -2532,7 +2532,7 @@ static void imx_uart_restore_context(struct imx_port *sport) imx_uart_writel(sport, sport->saved_reg[2], UCR3); imx_uart_writel(sport, sport->saved_reg[3], UCR4); sport->context_saved = false; @@ -4375,7 +6349,7 @@ index 349d4849b..3934ab8fe 100644 } static void imx_uart_save_context(struct imx_port *sport) -@@ -2533,7 +2533,7 @@ static void imx_uart_save_context(struct imx_port *sport) +@@ -2540,7 +2540,7 @@ static void imx_uart_save_context(struct imx_port *sport) unsigned long flags; /* Save necessary regs */ @@ -4384,7 +6358,7 @@ index 349d4849b..3934ab8fe 100644 sport->saved_reg[0] = imx_uart_readl(sport, UCR1); sport->saved_reg[1] = imx_uart_readl(sport, UCR2); sport->saved_reg[2] = imx_uart_readl(sport, UCR3); -@@ -2545,7 +2545,7 @@ static void imx_uart_save_context(struct imx_port *sport) +@@ -2552,7 +2552,7 @@ static void imx_uart_save_context(struct imx_port *sport) sport->saved_reg[8] = imx_uart_readl(sport, UBMR); sport->saved_reg[9] = imx_uart_readl(sport, IMX21_UTS); sport->context_saved = true; @@ -4393,8 +6367,49 @@ index 349d4849b..3934ab8fe 100644 } static void imx_uart_enable_wakeup(struct imx_port *sport, bool on) +-- +2.51.0 + +From fa9f6cf078de5b5ccbfa7112eb10d6bf224b602d Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:47 +0206 +Subject: [PATCH 055/213] serial: ip22zilog: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-31-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/ip22zilog.c | 36 +++++++++++++++++----------------- + 1 file changed, 18 insertions(+), 18 deletions(-) + diff --git a/drivers/tty/serial/ip22zilog.c b/drivers/tty/serial/ip22zilog.c -index 845ff706b..320b29cd4 100644 +index 845ff706bc59..320b29cd4683 100644 --- a/drivers/tty/serial/ip22zilog.c +++ b/drivers/tty/serial/ip22zilog.c @@ -432,7 +432,7 @@ static irqreturn_t ip22zilog_interrupt(int irq, void *dev_id) @@ -4542,8 +6557,50 @@ index 845ff706b..320b29cd4 100644 if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); +-- +2.51.0 + +From bd90655fc366bf194ca970f66fd47f68e9a831d1 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:48 +0206 +Subject: [PATCH 056/213] serial: jsm: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-32-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/jsm/jsm_neo.c | 4 ++-- + drivers/tty/serial/jsm/jsm_tty.c | 16 ++++++++-------- + 2 files changed, 10 insertions(+), 10 deletions(-) + diff --git a/drivers/tty/serial/jsm/jsm_neo.c b/drivers/tty/serial/jsm/jsm_neo.c -index 0c78f6627..2bd640428 100644 +index 0c78f66276cd..2bd640428970 100644 --- a/drivers/tty/serial/jsm/jsm_neo.c +++ b/drivers/tty/serial/jsm/jsm_neo.c @@ -816,9 +816,9 @@ static void neo_parse_isr(struct jsm_board *brd, u32 port) @@ -4559,7 +6616,7 @@ index 0c78f6627..2bd640428 100644 } diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c -index 222afc270..ce0fef7e2 100644 +index 1bee624bd484..be2f130696b3 100644 --- a/drivers/tty/serial/jsm/jsm_tty.c +++ b/drivers/tty/serial/jsm/jsm_tty.c @@ -152,14 +152,14 @@ static void jsm_tty_send_xchar(struct uart_port *port, char ch) @@ -4631,8 +6688,50 @@ index 222afc270..ce0fef7e2 100644 } static const char *jsm_tty_type(struct uart_port *port) +-- +2.51.0 + +From 2f11345f09064d2bdb7b5d4da2b1caa6b0cea2e6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:49 +0206 +Subject: [PATCH 057/213] serial: liteuart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Acked-by: Gabriel Somlo +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-33-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/liteuart.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c -index d881cdd2a..a25ab1efe 100644 +index d881cdd2a58f..a25ab1efe38f 100644 --- a/drivers/tty/serial/liteuart.c +++ b/drivers/tty/serial/liteuart.c @@ -139,13 +139,13 @@ static irqreturn_t liteuart_interrupt(int irq, void *data) @@ -4704,8 +6803,49 @@ index d881cdd2a..a25ab1efe 100644 } static int liteuart_console_setup(struct console *co, char *options) +-- +2.51.0 + +From 9d802a2a54f456e62bff87df114bc871c7f5ff05 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:50 +0206 +Subject: [PATCH 058/213] serial: lpc32xx_hs: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-34-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/lpc32xx_hs.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c -index b38fe4728..5149a947b 100644 +index b38fe4728c26..5149a947b7fe 100644 --- a/drivers/tty/serial/lpc32xx_hs.c +++ b/drivers/tty/serial/lpc32xx_hs.c @@ -140,15 +140,15 @@ static void lpc32xx_hsuart_console_write(struct console *co, const char *s, @@ -4816,8 +6956,49 @@ index b38fe4728..5149a947b 100644 /* Don't rewrite B0 */ if (tty_termios_baud_rate(termios)) +-- +2.51.0 + +From 31266ef049e1679245e6c6ee98f53e4d53aa09d6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:51 +0206 +Subject: [PATCH 059/213] serial: ma35d1: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-35-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/ma35d1_serial.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c -index 99225f1e0..faccd772c 100644 +index 99225f1e02ac..faccd772c68c 100644 --- a/drivers/tty/serial/ma35d1_serial.c +++ b/drivers/tty/serial/ma35d1_serial.c @@ -269,16 +269,16 @@ static void receive_chars(struct uart_ma35d1_port *up) @@ -4897,8 +7078,49 @@ index 99225f1e0..faccd772c 100644 } static int __init ma35d1serial_console_setup(struct console *co, char *options) +-- +2.51.0 + +From 7b0f02b6d2cf8b645adb853b4ac8e17212abb130 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:52 +0206 +Subject: [PATCH 060/213] serial: mcf: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-36-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/mcf.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c -index aea29b4e6..ee40af20a 100644 +index aea29b4e6567..ee40af20a08f 100644 --- a/drivers/tty/serial/mcf.c +++ b/drivers/tty/serial/mcf.c @@ -135,12 +135,12 @@ static void mcf_break_ctl(struct uart_port *port, int break_state) @@ -4988,8 +7210,49 @@ index aea29b4e6..ee40af20a 100644 return ret; } +-- +2.51.0 + +From e2ca6db3a33364a4fcc241013bcd257799df8064 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:53 +0206 +Subject: [PATCH 061/213] serial: men_z135_uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-37-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/men_z135_uart.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c -index d2502aaa3..8048fa542 100644 +index d2502aaa3e8c..8048fa542fc4 100644 --- a/drivers/tty/serial/men_z135_uart.c +++ b/drivers/tty/serial/men_z135_uart.c @@ -392,7 +392,7 @@ static irqreturn_t men_z135_intr(int irq, void *data) @@ -5028,8 +7291,50 @@ index d2502aaa3..8048fa542 100644 } static const char *men_z135_type(struct uart_port *port) +-- +2.51.0 + +From 6e93217eed0996a78171a518e3b9dd6522b4fc3b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:54 +0206 +Subject: [PATCH 062/213] serial: meson: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Acked-by: Neil Armstrong +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-38-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/meson_uart.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c -index 9388b9dde..4c1d2089a 100644 +index 9388b9ddea3b..4c1d2089a0bb 100644 --- a/drivers/tty/serial/meson_uart.c +++ b/drivers/tty/serial/meson_uart.c @@ -129,14 +129,14 @@ static void meson_uart_shutdown(struct uart_port *port) @@ -5159,8 +7464,49 @@ index 9388b9dde..4c1d2089a 100644 local_irq_restore(flags); } +-- +2.51.0 + +From 059158ddfea44dd9200e0090f7bc3dad3d1fa417 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:55 +0206 +Subject: [PATCH 063/213] serial: milbeaut_usio: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-39-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/milbeaut_usio.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + diff --git a/drivers/tty/serial/milbeaut_usio.c b/drivers/tty/serial/milbeaut_usio.c -index 70a910085..db3b81f2a 100644 +index 9de3883a4e0b..cd789c99e5d1 100644 --- a/drivers/tty/serial/milbeaut_usio.c +++ b/drivers/tty/serial/milbeaut_usio.c @@ -207,9 +207,9 @@ static irqreturn_t mlb_usio_rx_irq(int irq, void *dev_id) @@ -5224,8 +7570,49 @@ index 70a910085..db3b81f2a 100644 } static const char *mlb_usio_type(struct uart_port *port) +-- +2.51.0 + +From 2106b5a5df89e66469abaacef3d6576691d368af Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:56 +0206 +Subject: [PATCH 064/213] serial: mpc52xx: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-40-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/mpc52xx_uart.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c -index 916507b8f..a252465e7 100644 +index 916507b8f31d..a252465e745f 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -1096,14 +1096,14 @@ static void @@ -5277,8 +7664,49 @@ index 916507b8f..a252465e7 100644 return ret; } +-- +2.51.0 + +From d57c0a933fcb7d28af6c166f69c97d6d5a9fcc2b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:57 +0206 +Subject: [PATCH 065/213] serial: mps2-uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-41-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/mps2-uart.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + diff --git a/drivers/tty/serial/mps2-uart.c b/drivers/tty/serial/mps2-uart.c -index ea5a7911c..2a4c09f3a 100644 +index ea5a7911cb15..2a4c09f3a834 100644 --- a/drivers/tty/serial/mps2-uart.c +++ b/drivers/tty/serial/mps2-uart.c @@ -188,12 +188,12 @@ static irqreturn_t mps2_uart_rxirq(int irq, void *data) @@ -5344,8 +7772,50 @@ index ea5a7911c..2a4c09f3a 100644 if (tty_termios_baud_rate(termios)) tty_termios_encode_baud_rate(termios, baud, baud); +-- +2.51.0 + +From da6d5996f7aedb0046338856ea2db66bab5cac23 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:58 +0206 +Subject: [PATCH 066/213] serial: msm: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Bjorn Andersson +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-42-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/msm_serial.c | 38 ++++++++++++++++----------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c -index 90953e679..597264b54 100644 +index 76b6429fb9e9..2dfc7bc80808 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -444,7 +444,7 @@ static void msm_complete_tx_dma(void *args) @@ -5492,8 +7962,49 @@ index 90953e679..597264b54 100644 local_irq_restore(flags); } +-- +2.51.0 + +From d64b0370c027904d1f5e8b5cbaac6418d0b78dd4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:43:59 +0206 +Subject: [PATCH 067/213] serial: mvebu-uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-43-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/mvebu-uart.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c -index ea924e9b9..0255646bc 100644 +index ea924e9b913b..0255646bc175 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -187,9 +187,9 @@ static unsigned int mvebu_uart_tx_empty(struct uart_port *port) @@ -5564,8 +8075,49 @@ index ea924e9b9..0255646bc 100644 } static int mvebu_uart_console_setup(struct console *co, char *options) +-- +2.51.0 + +From 39ba6cf5cac58f3cd284d499f44ecc3a7b90e4f3 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:00 +0206 +Subject: [PATCH 068/213] serial: omap: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-44-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/omap-serial.c | 38 ++++++++++++++++---------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c -index 135a838f5..1097fca22 100644 +index 135a838f517a..f4c6ff806465 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -390,10 +390,10 @@ static void serial_omap_throttle(struct uart_port *port) @@ -5687,35 +8239,70 @@ index 135a838f5..1097fca22 100644 dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->port.line); } -@@ -1212,13 +1212,10 @@ serial_omap_console_write(struct console *co, const char *s, - unsigned int ier; - int locked = 1; - -- local_irq_save(flags); -- if (up->port.sysrq) -- locked = 0; -- else if (oops_in_progress) +@@ -1216,9 +1216,9 @@ serial_omap_console_write(struct console *co, const char *s, + if (up->port.sysrq) + locked = 0; + else if (oops_in_progress) - locked = spin_trylock(&up->port.lock); -+ if (up->port.sysrq || oops_in_progress) -+ locked = uart_port_trylock_irqsave(&up->port, &flags); ++ locked = uart_port_trylock(&up->port); else - spin_lock(&up->port.lock); -+ uart_port_lock_irqsave(&up->port, &flags); ++ uart_port_lock(&up->port); /* * First save the IER then disable the interrupts -@@ -1245,8 +1242,7 @@ serial_omap_console_write(struct console *co, const char *s, +@@ -1245,7 +1245,7 @@ serial_omap_console_write(struct console *co, const char *s, check_modem_status(up); if (locked) - spin_unlock(&up->port.lock); -- local_irq_restore(flags); -+ uart_port_unlock_irqrestore(&up->port, flags); ++ uart_port_unlock(&up->port); + local_irq_restore(flags); } - static int __init +-- +2.51.0 + +From 511d308bff2ae3af2de41ae6852f7fb1f900626e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:01 +0206 +Subject: [PATCH 069/213] serial: owl: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-45-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/owl-uart.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c -index e99970a94..919f5e5aa 100644 +index e99970a9437f..919f5e5aa0f1 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -125,12 +125,12 @@ static unsigned int owl_uart_tx_empty(struct uart_port *port) @@ -5825,8 +8412,49 @@ index e99970a94..919f5e5aa 100644 local_irq_restore(flags); } +-- +2.51.0 + +From 567d7f0eece278a59c0cfc9326f579dfd3138f89 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:02 +0206 +Subject: [PATCH 070/213] serial: pch: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-46-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/pch_uart.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c -index cc83b772b..436cc6d52 100644 +index 7b868ea48ad5..b6fda8cc992a 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -1347,7 +1347,7 @@ static void pch_uart_set_termios(struct uart_port *port, @@ -5869,8 +8497,49 @@ index cc83b772b..436cc6d52 100644 if (priv_locked) spin_unlock(&priv->lock); local_irq_restore(flags); +-- +2.51.0 + +From 08582892bc0a10f49720b34dbc194cd35db10843 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:03 +0206 +Subject: [PATCH 071/213] serial: pic32: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-47-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/pic32_uart.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c -index e308d5022..3a95bf5d5 100644 +index e308d5022b3f..3a95bf5d55d3 100644 --- a/drivers/tty/serial/pic32_uart.c +++ b/drivers/tty/serial/pic32_uart.c @@ -243,7 +243,7 @@ static void pic32_uart_break_ctl(struct uart_port *port, int ctl) @@ -5951,8 +8620,49 @@ index e308d5022..3a95bf5d5 100644 } /* serial core request to claim uart iomem */ +-- +2.51.0 + +From e9b2adc26c935c9b4d9f064cd9cdc1343809d89a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:04 +0206 +Subject: [PATCH 072/213] serial: pmac_zilog: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-48-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/pmac_zilog.c | 52 ++++++++++++++++----------------- + 1 file changed, 26 insertions(+), 26 deletions(-) + diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c -index 29bc80d39..77691fbbf 100644 +index 29bc80d39e8b..77691fbbf779 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -245,9 +245,9 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) @@ -6147,8 +8857,49 @@ index 29bc80d39..77691fbbf 100644 } /* +-- +2.51.0 + +From afa959966c9515f71dbe2291f9a6c24db07528e6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:05 +0206 +Subject: [PATCH 073/213] serial: pxa: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-49-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/pxa.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + diff --git a/drivers/tty/serial/pxa.c b/drivers/tty/serial/pxa.c -index 73c60f5ea..46e70e155 100644 +index 73c60f5ea027..46e70e155aab 100644 --- a/drivers/tty/serial/pxa.c +++ b/drivers/tty/serial/pxa.c @@ -225,14 +225,14 @@ static inline irqreturn_t serial_pxa_irq(int irq, void *dev_id) @@ -6261,8 +9012,50 @@ index 73c60f5ea..46e70e155 100644 local_irq_restore(flags); clk_disable(up->clk); +-- +2.51.0 + +From a891af24093dfdf7b7da07c77864f412f0c588e7 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:06 +0206 +Subject: [PATCH 074/213] serial: qcom-geni: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Bjorn Andersson +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-50-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/qcom_geni_serial.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c -index f820a09cb..a7db08431 100644 +index f820a09cb5c3..a7db08431ef3 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -492,9 +492,9 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s, @@ -6295,8 +9088,49 @@ index f820a09cb..a7db08431 100644 m_irq_status = readl(uport->membase + SE_GENI_M_IRQ_STATUS); s_irq_status = readl(uport->membase + SE_GENI_S_IRQ_STATUS); +-- +2.51.0 + +From 67bc58fbfd72e2852807b24883d7f5a816c0a328 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:07 +0206 +Subject: [PATCH 075/213] serial: rda: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-51-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/rda-uart.c | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c -index be5c842b5..d824c8318 100644 +index be5c842b5ba9..d824c8318f33 100644 --- a/drivers/tty/serial/rda-uart.c +++ b/drivers/tty/serial/rda-uart.c @@ -139,12 +139,12 @@ static unsigned int rda_uart_tx_empty(struct uart_port *port) @@ -6436,8 +9270,49 @@ index be5c842b5..d824c8318 100644 local_irq_restore(flags); } +-- +2.51.0 + +From ae9872f6dd9b9e43699d888278203fbea538d2fc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:08 +0206 +Subject: [PATCH 076/213] serial: rp2: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-52-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/rp2.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c -index 5a1de6044..45be2b881 100644 +index 5a1de6044b38..45be2b88168e 100644 --- a/drivers/tty/serial/rp2.c +++ b/drivers/tty/serial/rp2.c @@ -276,9 +276,9 @@ static unsigned int rp2_uart_tx_empty(struct uart_port *port) @@ -6514,8 +9389,49 @@ index 5a1de6044..45be2b881 100644 } static const char *rp2_uart_type(struct uart_port *port) +-- +2.51.0 + +From b704b4278f62eb37251c31665f220a0a644ff293 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:09 +0206 +Subject: [PATCH 077/213] serial: sa1100: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-53-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sa1100.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c -index ad011f1e2..be7bcd75d 100644 +index ad011f1e2f4d..be7bcd75d9f4 100644 --- a/drivers/tty/serial/sa1100.c +++ b/drivers/tty/serial/sa1100.c @@ -115,9 +115,9 @@ static void sa1100_timeout(struct timer_list *t) @@ -6595,8 +9511,49 @@ index ad011f1e2..be7bcd75d 100644 } static const char *sa1100_type(struct uart_port *port) +-- +2.51.0 + +From a2cdc82b55822add72ea4c377a058fecc6828464 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:10 +0206 +Subject: [PATCH 078/213] serial: samsung_tty: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-54-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/samsung_tty.c | 50 ++++++++++++++++---------------- + 1 file changed, 25 insertions(+), 25 deletions(-) + diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c -index 5a4d88e13..a82b65155 100644 +index 5a4d88e13471..a82b65155f6e 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -248,7 +248,7 @@ static void s3c24xx_serial_rx_enable(struct uart_port *port) @@ -6804,8 +9761,49 @@ index 5a4d88e13..a82b65155 100644 } /* Shouldn't be __init, as it can be instantiated from other module */ +-- +2.51.0 + +From d08e7b6582e2f5e94c86c23b2fc510ecbac516f2 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:11 +0206 +Subject: [PATCH 079/213] serial: sb1250-duart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-55-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sb1250-duart.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c -index f3cd69346..dbec29d9a 100644 +index f3cd69346482..dbec29d9a6c3 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -610,7 +610,7 @@ static void sbd_set_termios(struct uart_port *uport, struct ktermios *termios, @@ -6853,9 +9851,114 @@ index f3cd69346..dbec29d9a 100644 } static int __init sbd_console_setup(struct console *co, char *options) -diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c -index d4ec943cb..6d4006b41 100644 ---- a/drivers/tty/serial/serial-tegra.c +-- +2.51.0 + +From 93552983f41ced030136af0e4c89ab2b61b31f4c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:12 +0206 +Subject: [PATCH 080/213] serial: sc16is7xx: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-56-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sc16is7xx.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c +index 8a2ce2ca6b39..fd0f03965c11 100644 +--- a/drivers/tty/serial/sc16is7xx.c ++++ b/drivers/tty/serial/sc16is7xx.c +@@ -821,6 +821,7 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) + { + struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); ++ unsigned long flags; + + if ((port->rs485.flags & SER_RS485_ENABLED) && + (port->rs485.delay_rts_before_send > 0)) +@@ -829,6 +830,10 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) + mutex_lock(&one->efr_lock); + sc16is7xx_handle_tx(port); + mutex_unlock(&one->efr_lock); ++ ++ uart_port_lock_irqsave(port, &flags); ++ sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); ++ uart_port_unlock_irqrestore(port, flags); + } + + static void sc16is7xx_reconf_rs485(struct uart_port *port) +-- +2.51.0 + +From fb23d06ad644b0da8b27a374e736721fb3561042 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:13 +0206 +Subject: [PATCH 081/213] serial: tegra: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-57-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/serial-tegra.c | 32 +++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c +index d4ec943cb8e9..6d4006b41975 100644 +--- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -411,7 +411,7 @@ static int tegra_set_baudrate(struct tegra_uart_port *tup, unsigned int baud) divisor = DIV_ROUND_CLOSEST(rate, baud * 16); @@ -6993,8 +10096,50 @@ index d4ec943cb..6d4006b41 100644 } static const char *tegra_uart_type(struct uart_port *u) +-- +2.51.0 + +From 8cb79f516abeab2094a0a3b29013e131daef44b2 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:14 +0206 +Subject: [PATCH 082/213] serial: core: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-58-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/serial_core.c | 92 ++++++++++++++++---------------- + drivers/tty/serial/serial_port.c | 4 +- + 2 files changed, 48 insertions(+), 48 deletions(-) + diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c -index 8ff0efac6..398104760 100644 +index 8ff0efac6aa0..398104760fbc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -79,7 +79,7 @@ static inline void uart_port_deref(struct uart_port *uport) @@ -7307,8 +10452,70 @@ index 8ff0efac6..398104760 100644 uart_rs485_config(port); +diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c +index a21c28707703..a871acc51c70 100644 +--- a/drivers/tty/serial/serial_port.c ++++ b/drivers/tty/serial/serial_port.c +@@ -38,14 +38,14 @@ static int serial_port_runtime_resume(struct device *dev) + goto out; + + /* Flush any pending TX for the port */ +- spin_lock_irqsave(&port->lock, flags); ++ uart_port_lock_irqsave(port, &flags); + if (!port_dev->tx_enabled) + goto unlock; + if (__serial_port_busy(port)) + port->ops->start_tx(port); + + unlock: +- spin_unlock_irqrestore(&port->lock, flags); ++ uart_port_unlock_irqrestore(port, flags); + + out: + pm_runtime_mark_last_busy(dev); +-- +2.51.0 + +From 9ea7732c36ac2800cd6f574c83200cc802c564cf Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:15 +0206 +Subject: [PATCH 083/213] serial: mctrl_gpio: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-59-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/serial_mctrl_gpio.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c -index 7d5aaa8d4..e51ca593a 100644 +index d5fb293dd5a9..e9e166045cc8 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.c +++ b/drivers/tty/serial/serial_mctrl_gpio.c @@ -184,7 +184,7 @@ static irqreturn_t mctrl_gpio_irq_handle(int irq, void *context) @@ -7329,29 +10536,49 @@ index 7d5aaa8d4..e51ca593a 100644 return IRQ_HANDLED; } -diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c -index a21c28707..a871acc51 100644 ---- a/drivers/tty/serial/serial_port.c -+++ b/drivers/tty/serial/serial_port.c -@@ -38,14 +38,14 @@ static int serial_port_runtime_resume(struct device *dev) - goto out; - - /* Flush any pending TX for the port */ -- spin_lock_irqsave(&port->lock, flags); -+ uart_port_lock_irqsave(port, &flags); - if (!port_dev->tx_enabled) - goto unlock; - if (__serial_port_busy(port)) - port->ops->start_tx(port); - - unlock: -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - - out: - pm_runtime_mark_last_busy(dev); +-- +2.51.0 + +From b61f131067b8ea8e3fea48671eb3e7b6d4883efb Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:16 +0206 +Subject: [PATCH 084/213] serial: txx9: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-60-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/serial_txx9.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c -index be08fb6f7..eaa980722 100644 +index be08fb6f749c..eaa980722455 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -335,13 +335,13 @@ static irqreturn_t serial_txx9_interrupt(int irq, void *dev_id) @@ -7448,11 +10675,52 @@ index be08fb6f7..eaa980722 100644 } static void +-- +2.51.0 + +From af00d97852d82e8d25accd636d6574415f641c2e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:17 +0206 +Subject: [PATCH 085/213] serial: sh-sci: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-61-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sh-sci.c | 68 ++++++++++++++++++------------------- + 1 file changed, 34 insertions(+), 34 deletions(-) + diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c -index 4350a69d9..9f8e224e1 100644 +index dab5658d9d54..4e4ec569394f 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c -@@ -1206,7 +1206,7 @@ static void sci_dma_tx_complete(void *arg) +@@ -1227,7 +1227,7 @@ static void sci_dma_tx_complete(void *arg) dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); @@ -7461,7 +10729,7 @@ index 4350a69d9..9f8e224e1 100644 uart_xmit_advance(port, s->tx_dma_len); -@@ -1230,7 +1230,7 @@ static void sci_dma_tx_complete(void *arg) +@@ -1253,7 +1253,7 @@ static void sci_dma_tx_complete(void *arg) } } @@ -7470,7 +10738,7 @@ index 4350a69d9..9f8e224e1 100644 } /* Locking: called with port lock held */ -@@ -1326,7 +1326,7 @@ static void sci_dma_rx_complete(void *arg) +@@ -1349,7 +1349,7 @@ static void sci_dma_rx_complete(void *arg) dev_dbg(port->dev, "%s(%d) active cookie %d\n", __func__, port->line, s->active_rx); @@ -7479,7 +10747,7 @@ index 4350a69d9..9f8e224e1 100644 active = sci_dma_rx_find_active(s); if (active >= 0) -@@ -1353,20 +1353,20 @@ static void sci_dma_rx_complete(void *arg) +@@ -1376,20 +1376,20 @@ static void sci_dma_rx_complete(void *arg) dma_async_issue_pending(chan); @@ -7504,7 +10772,7 @@ index 4350a69d9..9f8e224e1 100644 } static void sci_dma_tx_release(struct sci_port *s) -@@ -1415,13 +1415,13 @@ static int sci_dma_rx_submit(struct sci_port *s, bool port_lock_held) +@@ -1438,13 +1438,13 @@ static int sci_dma_rx_submit(struct sci_port *s, bool port_lock_held) fail: /* Switch to PIO */ if (!port_lock_held) @@ -7520,7 +10788,7 @@ index 4350a69d9..9f8e224e1 100644 return -EAGAIN; } -@@ -1443,14 +1443,14 @@ static void sci_dma_tx_work_fn(struct work_struct *work) +@@ -1466,14 +1466,14 @@ static void sci_dma_tx_work_fn(struct work_struct *work) * transmit till the end, and then the rest. Take the port lock to get a * consistent xmit buffer state. */ @@ -7537,7 +10805,7 @@ index 4350a69d9..9f8e224e1 100644 return; } -@@ -1458,7 +1458,7 @@ static void sci_dma_tx_work_fn(struct work_struct *work) +@@ -1481,7 +1481,7 @@ static void sci_dma_tx_work_fn(struct work_struct *work) DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) { @@ -7546,7 +10814,7 @@ index 4350a69d9..9f8e224e1 100644 dev_warn(port->dev, "Failed preparing Tx DMA descriptor\n"); goto switch_to_pio; } -@@ -1470,12 +1470,12 @@ static void sci_dma_tx_work_fn(struct work_struct *work) +@@ -1493,12 +1493,12 @@ static void sci_dma_tx_work_fn(struct work_struct *work) desc->callback_param = s; s->cookie_tx = dmaengine_submit(desc); if (dma_submit_error(s->cookie_tx)) { @@ -7561,7 +10829,7 @@ index 4350a69d9..9f8e224e1 100644 dev_dbg(port->dev, "%s: %p: %d...%d, cookie %d\n", __func__, xmit->buf, tail, head, s->cookie_tx); -@@ -1483,10 +1483,10 @@ static void sci_dma_tx_work_fn(struct work_struct *work) +@@ -1506,10 +1506,10 @@ static void sci_dma_tx_work_fn(struct work_struct *work) return; switch_to_pio: @@ -7574,7 +10842,7 @@ index 4350a69d9..9f8e224e1 100644 return; } -@@ -1503,17 +1503,17 @@ static enum hrtimer_restart sci_dma_rx_timer_fn(struct hrtimer *t) +@@ -1526,17 +1526,17 @@ static enum hrtimer_restart sci_dma_rx_timer_fn(struct hrtimer *t) dev_dbg(port->dev, "DMA Rx timed out\n"); @@ -7595,7 +10863,7 @@ index 4350a69d9..9f8e224e1 100644 dev_dbg(port->dev, "Cookie %d #%d has already completed\n", s->active_rx, active); -@@ -1531,7 +1531,7 @@ static enum hrtimer_restart sci_dma_rx_timer_fn(struct hrtimer *t) +@@ -1554,7 +1554,7 @@ static enum hrtimer_restart sci_dma_rx_timer_fn(struct hrtimer *t) */ status = dmaengine_tx_status(s->chan_rx, s->active_rx, &state); if (status == DMA_COMPLETE) { @@ -7604,7 +10872,7 @@ index 4350a69d9..9f8e224e1 100644 dev_dbg(port->dev, "Transaction complete after DMA engine was stopped"); return HRTIMER_NORESTART; } -@@ -1552,7 +1552,7 @@ static enum hrtimer_restart sci_dma_rx_timer_fn(struct hrtimer *t) +@@ -1575,7 +1575,7 @@ static enum hrtimer_restart sci_dma_rx_timer_fn(struct hrtimer *t) sci_dma_rx_reenable_irq(s); @@ -7613,7 +10881,7 @@ index 4350a69d9..9f8e224e1 100644 return HRTIMER_NORESTART; } -@@ -1776,9 +1776,9 @@ static irqreturn_t sci_tx_interrupt(int irq, void *ptr) +@@ -1816,9 +1816,9 @@ static irqreturn_t sci_tx_interrupt(int irq, void *ptr) struct uart_port *port = ptr; unsigned long flags; @@ -7625,7 +10893,7 @@ index 4350a69d9..9f8e224e1 100644 return IRQ_HANDLED; } -@@ -1792,11 +1792,11 @@ static irqreturn_t sci_tx_end_interrupt(int irq, void *ptr) +@@ -1832,11 +1832,11 @@ static irqreturn_t sci_tx_end_interrupt(int irq, void *ptr) if (port->type != PORT_SCI) return sci_tx_interrupt(irq, ptr); @@ -7639,7 +10907,7 @@ index 4350a69d9..9f8e224e1 100644 return IRQ_HANDLED; } -@@ -2193,7 +2193,7 @@ static void sci_break_ctl(struct uart_port *port, int break_state) +@@ -2239,7 +2239,7 @@ static void sci_break_ctl(struct uart_port *port, int break_state) return; } @@ -7648,7 +10916,7 @@ index 4350a69d9..9f8e224e1 100644 scsptr = serial_port_in(port, SCSPTR); scscr = serial_port_in(port, SCSCR); -@@ -2207,7 +2207,7 @@ static void sci_break_ctl(struct uart_port *port, int break_state) +@@ -2253,7 +2253,7 @@ static void sci_break_ctl(struct uart_port *port, int break_state) serial_port_out(port, SCSPTR, scsptr); serial_port_out(port, SCSCR, scscr); @@ -7657,16 +10925,16 @@ index 4350a69d9..9f8e224e1 100644 } static int sci_startup(struct uart_port *port) -@@ -2239,7 +2239,7 @@ static void sci_shutdown(struct uart_port *port) +@@ -2286,7 +2286,7 @@ static void sci_shutdown(struct uart_port *port) s->autorts = false; - mctrl_gpio_disable_ms(to_sci_port(port)->gpios); + mctrl_gpio_disable_ms_sync(to_sci_port(port)->gpios); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sci_stop_rx(port); sci_stop_tx(port); /* -@@ -2249,7 +2249,7 @@ static void sci_shutdown(struct uart_port *port) +@@ -2296,7 +2296,7 @@ static void sci_shutdown(struct uart_port *port) scr = serial_port_in(port, SCSCR); serial_port_out(port, SCSCR, scr & (SCSCR_CKE1 | SCSCR_CKE0 | s->hscif_tot)); @@ -7675,7 +10943,7 @@ index 4350a69d9..9f8e224e1 100644 #ifdef CONFIG_SERIAL_SH_SCI_DMA if (s->chan_rx_saved) { -@@ -2551,7 +2551,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2598,7 +2598,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, serial_port_out(port, SCCKS, sccks); } @@ -7684,7 +10952,7 @@ index 4350a69d9..9f8e224e1 100644 sci_reset(port); -@@ -2673,7 +2673,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2720,7 +2720,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, if ((termios->c_cflag & CREAD) != 0) sci_start_rx(port); @@ -7693,7 +10961,7 @@ index 4350a69d9..9f8e224e1 100644 sci_port_disable(s); -@@ -3058,9 +3058,9 @@ static void serial_console_write(struct console *co, const char *s, +@@ -3096,9 +3096,9 @@ static void serial_console_write(struct console *co, const char *s, if (port->sysrq) locked = 0; else if (oops_in_progress) @@ -7705,7 +10973,7 @@ index 4350a69d9..9f8e224e1 100644 /* first save SCSCR then disable interrupts, keep clock source */ ctrl = serial_port_in(port, SCSCR); -@@ -3080,7 +3080,7 @@ static void serial_console_write(struct console *co, const char *s, +@@ -3118,7 +3118,7 @@ static void serial_console_write(struct console *co, const char *s, serial_port_out(port, SCSCR, ctrl); if (locked) @@ -7714,21 +10982,52 @@ index 4350a69d9..9f8e224e1 100644 } static int serial_console_setup(struct console *co, char *options) +-- +2.51.0 + +From 0c549dde41e045473cdc3f436404dd1b478a53dc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:18 +0206 +Subject: [PATCH 086/213] serial: sifive: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-62-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sifive.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c -index d195c5de5..d420bed4a 100644 +index e86b00873d0e..fea6b999ba0e 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c -@@ -412,7 +412,8 @@ static void __ssp_receive_chars(struct sifive_serial_port *ssp) - break; - - ssp->port.icount.rx++; -- uart_insert_char(&ssp->port, 0, 0, ch, TTY_NORMAL); -+ if (!uart_prepare_sysrq_char(&ssp->port, ch)) -+ uart_insert_char(&ssp->port, 0, 0, ch, TTY_NORMAL); - } - - tty_flip_buffer_push(&ssp->port.state->port); -@@ -521,11 +522,11 @@ static irqreturn_t sifive_serial_irq(int irq, void *dev_id) +@@ -521,11 +521,11 @@ static irqreturn_t sifive_serial_irq(int irq, void *dev_id) struct sifive_serial_port *ssp = dev_id; u32 ip; @@ -7742,16 +11041,16 @@ index d195c5de5..d420bed4a 100644 return IRQ_NONE; } -@@ -534,7 +535,7 @@ static irqreturn_t sifive_serial_irq(int irq, void *dev_id) +@@ -534,7 +534,7 @@ static irqreturn_t sifive_serial_irq(int irq, void *dev_id) if (ip & SIFIVE_SERIAL_IP_TXWM_MASK) __ssp_transmit_chars(ssp); - spin_unlock(&ssp->port.lock); -+ uart_unlock_and_check_sysrq(&ssp->port); ++ uart_port_unlock(&ssp->port); return IRQ_HANDLED; } -@@ -653,7 +654,7 @@ static void sifive_serial_set_termios(struct uart_port *port, +@@ -659,7 +659,7 @@ static void sifive_serial_set_termios(struct uart_port *port, ssp->port.uartclk / 16); __ssp_update_baud_rate(ssp, rate); @@ -7760,7 +11059,7 @@ index d195c5de5..d420bed4a 100644 /* Update the per-port timeout */ uart_update_timeout(port, termios->c_cflag, rate); -@@ -670,7 +671,7 @@ static void sifive_serial_set_termios(struct uart_port *port, +@@ -676,7 +676,7 @@ static void sifive_serial_set_termios(struct uart_port *port, if (v != old_v) __ssp_writel(v, SIFIVE_SERIAL_RXCTRL_OFFS, ssp); @@ -7769,35 +11068,70 @@ index d195c5de5..d420bed4a 100644 } static void sifive_serial_release_port(struct uart_port *port) -@@ -791,13 +792,10 @@ static void sifive_serial_console_write(struct console *co, const char *s, - if (!ssp) - return; - -- local_irq_save(flags); -- if (ssp->port.sysrq) -- locked = 0; -- else if (oops_in_progress) +@@ -801,9 +801,9 @@ static void sifive_serial_console_write(struct console *co, const char *s, + if (ssp->port.sysrq) + locked = 0; + else if (oops_in_progress) - locked = spin_trylock(&ssp->port.lock); -+ if (oops_in_progress) -+ locked = uart_port_trylock_irqsave(&ssp->port, &flags); ++ locked = uart_port_trylock(&ssp->port); else - spin_lock(&ssp->port.lock); -+ uart_port_lock_irqsave(&ssp->port, &flags); ++ uart_port_lock(&ssp->port); ier = __ssp_readl(ssp, SIFIVE_SERIAL_IE_OFFS); __ssp_writel(0, SIFIVE_SERIAL_IE_OFFS, ssp); -@@ -807,8 +805,7 @@ static void sifive_serial_console_write(struct console *co, const char *s, +@@ -813,7 +813,7 @@ static void sifive_serial_console_write(struct console *co, const char *s, __ssp_writel(ier, SIFIVE_SERIAL_IE_OFFS, ssp); if (locked) - spin_unlock(&ssp->port.lock); -- local_irq_restore(flags); -+ uart_port_unlock_irqrestore(&ssp->port, flags); ++ uart_port_unlock(&ssp->port); + local_irq_restore(flags); } - static int sifive_serial_console_setup(struct console *co, char *options) +-- +2.51.0 + +From f9fd486e89a7e3105d3915fce918dbae1733e83d Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:19 +0206 +Subject: [PATCH 087/213] serial: sprd: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-63-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sprd_serial.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c -index f328fa572..f257525f9 100644 +index f328fa57231f..f257525f9299 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -247,7 +247,7 @@ static void sprd_complete_tx_dma(void *data) @@ -7922,8 +11256,49 @@ index f328fa572..f257525f9 100644 } static int sprd_console_setup(struct console *co, char *options) +-- +2.51.0 + +From e9bdea49db76075b12195801dfcb7aa00bd00342 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:20 +0206 +Subject: [PATCH 088/213] serial: st-asc: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-64-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/st-asc.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c -index 92b9f6894..a821f5d76 100644 +index 92b9f6894006..a821f5d76a26 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -319,7 +319,7 @@ static irqreturn_t asc_interrupt(int irq, void *ptr) @@ -7996,8 +11371,49 @@ index 92b9f6894..a821f5d76 100644 } static int asc_console_setup(struct console *co, char *options) +-- +2.51.0 + +From f22563f1eb462897582ef5e6a78a5f727bbb08ac Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:21 +0206 +Subject: [PATCH 089/213] serial: stm32: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-65-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/stm32-usart.c | 38 ++++++++++++++++---------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c -index 9ef90bb30..b963f9ccb 100644 +index b58422ae156c..0eebd06faad0 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -535,7 +535,7 @@ static void stm32_usart_rx_dma_complete(void *arg) @@ -8145,8 +11561,49 @@ index 9ef90bb30..b963f9ccb 100644 /* Poll data from DMA RX buffer if any */ if (!stm32_usart_rx_dma_pause(stm32_port)) size += stm32_usart_receive_chars(port, true); +-- +2.51.0 + +From 6e6178923e471c2fb5ebd28b44835088569e9323 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:22 +0206 +Subject: [PATCH 090/213] serial: sunhv: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-66-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sunhv.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c -index c671d674b..5bfc0040f 100644 +index c671d674bce4..5bfc0040f17b 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -217,10 +217,10 @@ static irqreturn_t sunhv_interrupt(int irq, void *dev_id) @@ -8258,8 +11715,49 @@ index c671d674b..5bfc0040f 100644 } static struct console sunhv_console = { +-- +2.51.0 + +From 426fb932284431e17c3a587bf394a34fc6a62e9b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:23 +0206 +Subject: [PATCH 091/213] serial: sunplus-uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-67-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sunplus-uart.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + diff --git a/drivers/tty/serial/sunplus-uart.c b/drivers/tty/serial/sunplus-uart.c -index 3aacd5eb4..4251f4e1b 100644 +index 3aacd5eb414c..4251f4e1ba99 100644 --- a/drivers/tty/serial/sunplus-uart.c +++ b/drivers/tty/serial/sunplus-uart.c @@ -184,7 +184,7 @@ static void sunplus_break_ctl(struct uart_port *port, int ctl) @@ -8368,8 +11866,49 @@ index 3aacd5eb4..4251f4e1b 100644 local_irq_restore(flags); } +-- +2.51.0 + +From 001f19172bfdd9b9fb57e5692458ddefa5177b9a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:24 +0206 +Subject: [PATCH 092/213] serial: sunsab: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-68-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sunsab.c | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c -index 40eeaf835..6aa51a6f8 100644 +index 40eeaf835bba..6aa51a6f8063 100644 --- a/drivers/tty/serial/sunsab.c +++ b/drivers/tty/serial/sunsab.c @@ -310,7 +310,7 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id) @@ -8508,8 +12047,49 @@ index 40eeaf835..6aa51a6f8 100644 return 0; } +-- +2.51.0 + +From 68e93b850bb511f7dd90cdd3fec173a8abe76112 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:25 +0206 +Subject: [PATCH 093/213] serial: sunsu: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-69-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sunsu.c | 46 +++++++++++++++++++------------------- + 1 file changed, 23 insertions(+), 23 deletions(-) + diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c -index 58a4342ad..1e051cc25 100644 +index 58a4342ad0f9..1e051cc2591c 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -212,9 +212,9 @@ static void enable_rsa(struct uart_sunsu_port *up) @@ -8691,8 +12271,49 @@ index 58a4342ad..1e051cc25 100644 } /* +-- +2.51.0 + +From 158184c45a5228fb69fc3c065b1d03815f20ca58 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:26 +0206 +Subject: [PATCH 094/213] serial: sunzilog: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-70-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/sunzilog.c | 42 +++++++++++++++++------------------ + 1 file changed, 21 insertions(+), 21 deletions(-) + diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c -index c8c71c562..d3b5e864b 100644 +index c8c71c56264c..d3b5e864b727 100644 --- a/drivers/tty/serial/sunzilog.c +++ b/drivers/tty/serial/sunzilog.c @@ -531,7 +531,7 @@ static irqreturn_t sunzilog_interrupt(int irq, void *dev_id) @@ -8866,8 +12487,49 @@ index c8c71c562..d3b5e864b 100644 #ifdef CONFIG_SERIO if (up->flags & (SUNZILOG_FLAG_CONS_KEYB | +-- +2.51.0 + +From 7efc7105108b812a0b1172342c68da4acfd4fef1 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:27 +0206 +Subject: [PATCH 095/213] serial: timbuart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-71-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/timbuart.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c -index 0859394a7..0cc6524f5 100644 +index 0859394a78cd..0cc6524f5e8b 100644 --- a/drivers/tty/serial/timbuart.c +++ b/drivers/tty/serial/timbuart.c @@ -174,7 +174,7 @@ static void timbuart_tasklet(struct tasklet_struct *t) @@ -8901,8 +12563,49 @@ index 0859394a7..0cc6524f5 100644 } static const char *timbuart_type(struct uart_port *port) +-- +2.51.0 + +From 67fceae6cda6ae4ee17e5f91257e0c78c598cf87 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:28 +0206 +Subject: [PATCH 096/213] serial: uartlite: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-72-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/uartlite.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c -index b225a78f6..404c14aca 100644 +index 9f39bafa7fa9..9ffe82235af7 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -216,11 +216,11 @@ static irqreturn_t ulite_isr(int irq, void *dev_id) @@ -8970,8 +12673,50 @@ index b225a78f6..404c14aca 100644 } static int ulite_console_setup(struct console *co, char *options) +-- +2.51.0 + +From eee55e5fbbbea06bf78c05de69da6a1cc046c2cc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:29 +0206 +Subject: [PATCH 097/213] serial: ucc_uart: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Acked-by: Timur Tabi +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-73-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/ucc_uart.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c -index b06661b80..ed7a6bb55 100644 +index b06661b80f41..ed7a6bb5596a 100644 --- a/drivers/tty/serial/ucc_uart.c +++ b/drivers/tty/serial/ucc_uart.c @@ -931,7 +931,7 @@ static void qe_uart_set_termios(struct uart_port *port, @@ -8992,8 +12737,49 @@ index b06661b80..ed7a6bb55 100644 } /* +-- +2.51.0 + +From 3a32b43c648952d2daff1e1948ae01496ae389e4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:30 +0206 +Subject: [PATCH 098/213] serial: vt8500: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-74-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/vt8500_serial.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c -index c5d5c2765..78a1c1eea 100644 +index c5d5c2765119..78a1c1eea11b 100644 --- a/drivers/tty/serial/vt8500_serial.c +++ b/drivers/tty/serial/vt8500_serial.c @@ -227,7 +227,7 @@ static irqreturn_t vt8500_irq(int irq, void *dev_id) @@ -9032,8 +12818,49 @@ index c5d5c2765..78a1c1eea 100644 } static const char *vt8500_type(struct uart_port *port) +-- +2.51.0 + +From 0f455b8eb535d0304664de3e7c77d83a19b8df35 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 14 Sep 2023 20:44:31 +0206 +Subject: [PATCH 099/213] serial: xilinx_uartps: Use port lock wrappers + +When a serial port is used for kernel console output, then all +modifications to the UART registers which are done from other contexts, +e.g. getty, termios, are interference points for the kernel console. + +So far this has been ignored and the printk output is based on the +principle of hope. The rework of the console infrastructure which aims to +support threaded and atomic consoles, requires to mark sections which +modify the UART registers as unsafe. This allows the atomic write function +to make informed decisions and eventually to restore operational state. It +also allows to prevent the regular UART code from modifying UART registers +while printk output is in progress. + +All modifications of UART registers are guarded by the UART port lock, +which provides an obvious synchronization point with the console +infrastructure. + +To avoid adding this functionality to all UART drivers, wrap the +spin_[un]lock*() invocations for uart_port::lock into helper functions +which just contain the spin_[un]lock*() invocations for now. In a +subsequent step these helpers will gain the console synchronization +mechanisms. + +Converted with coccinelle. No functional change. + +Signed-off-by: Thomas Gleixner +Signed-off-by: John Ogness +Link: https://lore.kernel.org/r/20230914183831.587273-75-john.ogness@linutronix.de +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/xilinx_uartps.c | 52 +++++++++++++++--------------- + 1 file changed, 26 insertions(+), 26 deletions(-) + diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c -index 7f83d2780..84652f530 100644 +index 7f83d2780017..84652f530136 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -346,7 +346,7 @@ static irqreturn_t cdns_uart_isr(int irq, void *dev_id) @@ -9260,96 +13087,61 @@ index 7f83d2780..84652f530 100644 } return uart_resume_port(cdns_uart->cdns_uart_driver, port); -diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c -index 117abcf36..03e2eaf24 100644 ---- a/drivers/tty/tty_io.c -+++ b/drivers/tty/tty_io.c -@@ -3543,8 +3543,15 @@ static ssize_t show_cons_active(struct device *dev, - for_each_console(c) { - if (!c->device) - continue; -- if (!c->write) -- continue; -+ if (c->flags & CON_NBCON) { -+ if (!c->write_atomic && -+ !(c->write_thread && c->kthread)) { -+ continue; -+ } -+ } else { -+ if (!c->write) -+ continue; -+ } - if ((c->flags & CON_ENABLED) == 0) - continue; - cs[i++] = c; -diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c -index e0758fe79..270367654 100644 ---- a/fs/proc/consoles.c -+++ b/fs/proc/consoles.c -@@ -21,12 +21,14 @@ static int show_console_dev(struct seq_file *m, void *v) - { CON_ENABLED, 'E' }, - { CON_CONSDEV, 'C' }, - { CON_BOOT, 'B' }, -+ { CON_NBCON, 'N' }, - { CON_PRINTBUFFER, 'p' }, - { CON_BRL, 'b' }, - { CON_ANYTIME, 'a' }, - }; - char flags[ARRAY_SIZE(con_flags) + 1]; - struct console *con = v; -+ char con_write = '-'; - unsigned int a; - dev_t dev = 0; - -@@ -57,9 +59,15 @@ static int show_console_dev(struct seq_file *m, void *v) - seq_setwidth(m, 21 - 1); - seq_printf(m, "%s%d", con->name, con->index); - seq_pad(m, ' '); -- seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', -- con->write ? 'W' : '-', con->unblank ? 'U' : '-', -- flags); -+ if (con->flags & CON_NBCON) { -+ if (con->write_atomic || con->write_thread) -+ con_write = 'W'; -+ } else { -+ if (con->write) -+ con_write = 'W'; -+ } -+ seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write, -+ con->unblank ? 'U' : '-', flags); - if (dev) - seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); - -diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h -index fc53e0ad5..448bbef47 100644 ---- a/include/linux/bottom_half.h -+++ b/include/linux/bottom_half.h -@@ -35,8 +35,10 @@ static inline void local_bh_enable(void) - - #ifdef CONFIG_PREEMPT_RT - extern bool local_bh_blocked(void); -+extern void softirq_preempt(void); - #else - static inline bool local_bh_blocked(void) { return false; } -+static inline void softirq_preempt(void) { } - #endif - - #endif /* _LINUX_BH_H */ +-- +2.51.0 + +From 51e74fdb0f41c444346c4ff6c4c9221bd5e90ae8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:00 +0206 +Subject: [PATCH 100/213] printk: Add non-BKL (nbcon) console basic + infrastructure + +The current console/printk subsystem is protected by a Big Kernel Lock, +(aka console_lock) which has ill defined semantics and is more or less +stateless. This puts severe limitations on the console subsystem and +makes forced takeover and output in emergency and panic situations a +fragile endeavour that is based on try and pray. + +The goal of non-BKL (nbcon) consoles is to break out of the console lock +jail and to provide a new infrastructure that avoids the pitfalls and +also allows console drivers to be gradually converted over. + +The proposed infrastructure aims for the following properties: + + - Per console locking instead of global locking + - Per console state that allows to make informed decisions + - Stateful handover and takeover + +As a first step, state is added to struct console. The per console state +is an atomic_t using a 32bit bit field. + +Reserve state bits, which will be populated later in the series. Wire +it up into the console register/unregister functionality. + +It was decided to use a bitfield because using a plain u32 with +mask/shift operations resulted in uncomprehensible code. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-2-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 31 ++++++++++++++++++ + kernel/printk/Makefile | 2 +- + kernel/printk/internal.h | 8 +++++ + kernel/printk/nbcon.c | 70 ++++++++++++++++++++++++++++++++++++++++ + kernel/printk/printk.c | 13 ++++++-- + 5 files changed, 120 insertions(+), 4 deletions(-) + create mode 100644 kernel/printk/nbcon.c + diff --git a/include/linux/console.h b/include/linux/console.h -index 7de11c763..1eb9580e9 100644 +index 38571607065d..bd962f178c8d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h -@@ -16,7 +16,9 @@ - - #include - #include -+#include - #include -+#include - #include - - struct vc_data; -@@ -156,6 +158,8 @@ static inline int con_debug_leave(void) +@@ -161,6 +161,8 @@ static inline int con_debug_leave(void) * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. @@ -9358,40 +13150,24 @@ index 7de11c763..1eb9580e9 100644 */ enum cons_flags { CON_PRINTBUFFER = BIT(0), -@@ -166,6 +170,111 @@ enum cons_flags { +@@ -171,8 +173,32 @@ enum cons_flags { CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), -+}; -+ + }; + +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * -+ * @req_prio: The priority of a handover request -+ * @prio: The priority of the current owner -+ * @unsafe: Console is busy in a non takeover region -+ * @unsafe_takeover: A hostile takeover in an unsafe state happened in the -+ * past. The console cannot be safe until re-initialized. -+ * @cpu: The CPU on which the owner runs -+ * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. -+ * -+ * The @prio and @req_prio fields are particularly important to allow -+ * spin-waiting to timeout and give up without the risk of a waiter being -+ * assigned the lock after giving up. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { -+ unsigned int prio : 2; -+ unsigned int req_prio : 2; -+ unsigned int unsafe : 1; -+ unsigned int unsafe_takeover : 1; -+ unsigned int cpu : 24; + }; + }; +}; @@ -9404,2355 +13180,6216 @@ index 7de11c763..1eb9580e9 100644 + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + -+/** -+ * nbcon_prio - console owner priority for nbcon consoles -+ * @NBCON_PRIO_NONE: Unused -+ * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage -+ * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) -+ * @NBCON_PRIO_PANIC: Panic output -+ * @NBCON_PRIO_MAX: The number of priority levels -+ * -+ * A higher priority context can takeover the console when it is -+ * in the safe state. The final attempt to flush consoles in panic() -+ * can be allowed to do so even in an unsafe state (Hope and pray). -+ */ -+enum nbcon_prio { -+ NBCON_PRIO_NONE = 0, -+ NBCON_PRIO_NORMAL, -+ NBCON_PRIO_EMERGENCY, -+ NBCON_PRIO_PANIC, -+ NBCON_PRIO_MAX, -+}; -+ -+struct console; -+struct printk_buffers; -+ -+/** -+ * struct nbcon_context - Context for console acquire/release -+ * @console: The associated console -+ * @spinwait_max_us: Limit for spin-wait acquire -+ * @prio: Priority of the context -+ * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can -+ * be used only with NBCON_PRIO_PANIC @prio. It -+ * might cause a system freeze when the console -+ * is used later. -+ * @backlog: Ringbuffer has pending records -+ * @pbufs: Pointer to the text buffer for this context -+ * @seq: The sequence number to print for this context -+ */ -+struct nbcon_context { -+ /* members set by caller */ -+ struct console *console; -+ unsigned int spinwait_max_us; -+ enum nbcon_prio prio; -+ unsigned int allow_unsafe_takeover : 1; -+ -+ /* members set by emit */ -+ unsigned int backlog : 1; -+ -+ /* members set by acquire */ -+ struct printk_buffers *pbufs; -+ u64 seq; -+}; -+ -+/** -+ * struct nbcon_write_context - Context handed to the nbcon write callbacks -+ * @ctxt: The core console context -+ * @outbuf: Pointer to the text buffer for output -+ * @len: Length to write -+ * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred -+ */ -+struct nbcon_write_context { -+ struct nbcon_context __private ctxt; -+ char *outbuf; -+ unsigned int len; -+ bool unsafe_takeover; - }; - /** -@@ -187,6 +296,17 @@ enum cons_flags { + * struct console - The console descriptor structure + * @name: The name of the console driver +@@ -192,6 +218,8 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * -+ * @write_atomic: Write callback for atomic context -+ * @write_thread: Write callback for non-atomic context -+ * @driver_enter: Callback to begin synchronization with driver code -+ * @driver_exit: Callback to finish synchronization with driver code + * @nbcon_state: State for nbcon consoles -+ * @nbcon_seq: Sequence number of the next record for nbcon to print -+ * @pbufs: Pointer to nbcon private buffer -+ * @kthread: Printer kthread for this console -+ * @rcuwait: RCU-safe wait object for @kthread waking -+ * @irq_work: Defer @kthread waking to IRQ work context */ struct console { char name[16]; -@@ -206,6 +326,20 @@ struct console { +@@ -211,6 +239,9 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ -+ bool (*write_atomic)(struct console *con, -+ struct nbcon_write_context *wctxt); -+ bool (*write_thread)(struct console *con, -+ struct nbcon_write_context *wctxt); -+ void (*driver_enter)(struct console *con, unsigned long *flags); -+ void (*driver_exit)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; -+ atomic_long_t __private nbcon_seq; -+ struct printk_buffers *pbufs; -+ struct task_struct *kthread; -+ struct rcuwait rcuwait; -+ struct irq_work irq_work; }; #ifdef CONFIG_LOCKDEP -@@ -332,6 +466,22 @@ static inline bool console_is_registered(const struct console *con) - lockdep_assert_console_list_lock_held(); \ - hlist_for_each_entry(con, &console_list, node) - -+#ifdef CONFIG_PRINTK -+extern void nbcon_cpu_emergency_enter(void); -+extern void nbcon_cpu_emergency_exit(void); -+extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); -+extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); -+extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); -+extern void nbcon_reacquire(struct nbcon_write_context *wctxt); -+#else -+static inline void nbcon_cpu_emergency_enter(void) { } -+static inline void nbcon_cpu_emergency_exit(void) { } -+static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } -+static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } -+static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } -+static inline void nbcon_reacquire(struct nbcon_write_context *wctxt) { } -+#endif -+ - extern int console_set_on_cmdline; - extern struct console *early_console; +diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile +index f5b388e810b9..39a2b61c7232 100644 +--- a/kernel/printk/Makefile ++++ b/kernel/printk/Makefile +@@ -1,6 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0-only + obj-y = printk.o +-obj-$(CONFIG_PRINTK) += printk_safe.o ++obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o + obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o + obj-$(CONFIG_PRINTK_INDEX) += index.o -diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h -index d95ab85f9..8b3ab0cc1 100644 ---- a/include/linux/entry-common.h -+++ b/include/linux/entry-common.h -@@ -60,7 +60,7 @@ - #define EXIT_TO_USER_MODE_WORK \ - (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ -- ARCH_EXIT_TO_USER_MODE_WORK) -+ _TIF_NEED_RESCHED_LAZY | ARCH_EXIT_TO_USER_MODE_WORK) +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 7d4979d5c3ce..2ca0ab78802c 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -3,6 +3,7 @@ + * internal.h - printk internal definitions + */ + #include ++#include - /** - * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs -diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h -index 6813171af..674a622c9 100644 ---- a/include/linux/entry-kvm.h -+++ b/include/linux/entry-kvm.h -@@ -18,7 +18,7 @@ + #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) + void __init printk_sysctl_init(void); +@@ -61,6 +62,10 @@ void defer_console_output(void); - #define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ -- _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) -+ _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED_LAZY | ARCH_XFER_TO_GUEST_MODE_WORK) + u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags); ++ ++void nbcon_init(struct console *con); ++void nbcon_cleanup(struct console *con); ++ + #else - struct kvm_vcpu; + #define PRINTK_PREFIX_MAX 0 +@@ -76,6 +81,9 @@ u16 printk_parse_prefix(const char *text, int *level, + #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) -diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h -index 4a1dc88dd..a5091ac97 100644 ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -609,6 +609,35 @@ extern void __raise_softirq_irqoff(unsigned int nr); - extern void raise_softirq_irqoff(unsigned int nr); - extern void raise_softirq(unsigned int nr); + static inline bool printk_percpu_data_ready(void) { return false; } ++static inline void nbcon_init(struct console *con) { } ++static inline void nbcon_cleanup(struct console *con) { } ++ + #endif /* CONFIG_PRINTK */ -+#ifdef CONFIG_PREEMPT_RT -+DECLARE_PER_CPU(struct task_struct *, timersd); -+DECLARE_PER_CPU(unsigned long, pending_timer_softirq); + /** +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +new file mode 100644 +index 000000000000..63d24ca62ac5 +--- /dev/null ++++ b/kernel/printk/nbcon.c +@@ -0,0 +1,70 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++// Copyright (C) 2022 Linutronix GmbH, John Ogness ++// Copyright (C) 2022 Intel, Thomas Gleixner + -+extern void raise_timer_softirq(void); -+extern void raise_hrtimer_softirq(void); ++#include ++#include ++#include "internal.h" ++/* ++ * Printk console printing implementation for consoles which does not depend ++ * on the legacy style console_lock mechanism. ++ */ + -+static inline unsigned int local_pending_timers(void) ++/** ++ * nbcon_state_set - Helper function to set the console state ++ * @con: Console to update ++ * @new: The new state to write ++ * ++ * Only to be used when the console is not yet or no longer visible in the ++ * system. Otherwise use nbcon_state_try_cmpxchg(). ++ */ ++static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) +{ -+ return __this_cpu_read(pending_timer_softirq); ++ atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); +} + -+#else -+static inline void raise_timer_softirq(void) ++/** ++ * nbcon_state_read - Helper function to read the console state ++ * @con: Console to read ++ * @state: The state to store the result ++ */ ++static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) +{ -+ raise_softirq(TIMER_SOFTIRQ); ++ state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); +} + -+static inline void raise_hrtimer_softirq(void) ++/** ++ * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state ++ * @con: Console to update ++ * @cur: Old/expected state ++ * @new: New state ++ * ++ * Return: True on success. False on fail and @cur is updated. ++ */ ++static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, ++ struct nbcon_state *new) +{ -+ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); +} + -+static inline unsigned int local_pending_timers(void) ++/** ++ * nbcon_init - Initialize the nbcon console specific data ++ * @con: Console to initialize ++ */ ++void nbcon_init(struct console *con) +{ -+ return local_softirq_pending(); ++ struct nbcon_state state = { }; ++ ++ nbcon_state_set(con, &state); +} -+#endif + - DECLARE_PER_CPU(struct task_struct *, ksoftirqd); ++/** ++ * nbcon_cleanup - Cleanup the nbcon console specific data ++ * @con: Console to cleanup ++ */ ++void nbcon_cleanup(struct console *con) ++{ ++ struct nbcon_state state = { }; ++ ++ nbcon_state_set(con, &state); ++} +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 51c43e0f9b29..0bcffaac59c2 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3385,9 +3385,10 @@ static void try_enable_default_console(struct console *newcon) + newcon->flags |= CON_CONSDEV; + } - static inline struct task_struct *this_cpu_ksoftirqd(void) -diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index 337a9d1c5..531428066 100644 ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -3266,7 +3266,11 @@ struct softnet_data { - int defer_count; - int defer_ipi_scheduled; - struct sk_buff *defer_list; -+#ifndef CONFIG_PREEMPT_RT - call_single_data_t defer_csd; -+#else -+ struct work_struct defer_work; -+#endif - }; +-#define con_printk(lvl, con, fmt, ...) \ +- printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ +- (con->flags & CON_BOOT) ? "boot" : "", \ ++#define con_printk(lvl, con, fmt, ...) \ ++ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ ++ (con->flags & CON_NBCON) ? "" : "legacy ", \ ++ (con->flags & CON_BOOT) ? "boot" : "", \ + con->name, con->index, ##__VA_ARGS__) - static inline void input_queue_head_incr(struct softnet_data *sd) -diff --git a/include/linux/printk.h b/include/linux/printk.h -index e4878bb58..ebebc32e7 100644 ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -9,6 +9,8 @@ - #include - #include + static void console_init_seq(struct console *newcon, bool bootcon_registered) +@@ -3547,6 +3548,9 @@ void register_console(struct console *newcon) + newcon->dropped = 0; + console_init_seq(newcon, bootcon_registered); -+struct uart_port; ++ if (newcon->flags & CON_NBCON) ++ nbcon_init(newcon); + - extern const char linux_banner[]; - extern const char linux_proc_banner[]; - -@@ -159,13 +161,16 @@ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); + /* + * Put this console in the list - keep the + * preferred driver at the head of the list. +@@ -3638,6 +3642,9 @@ static int unregister_console_locked(struct console *console) + */ + synchronize_srcu(&console_srcu); - extern void __printk_safe_enter(void); - extern void __printk_safe_exit(void); -+extern void __printk_deferred_enter(void); -+extern void __printk_deferred_exit(void); ++ if (console->flags & CON_NBCON) ++ nbcon_cleanup(console); + - /* - * The printk_deferred_enter/exit macros are available only as a hack for - * some code paths that need to defer all printk console printing. Interrupts - * must be disabled for the deferred duration. + console_sysfs_notify(); + + if (console->exit) +-- +2.51.0 + +From 79d685ef217b9cf8c9e0dacf2b2f013cd5d8c063 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:01 +0206 +Subject: [PATCH 101/213] printk: nbcon: Add acquire/release logic + +Add per console acquire/release functionality. + +The state of the console is maintained in the "nbcon_state" atomic +variable. + +The console is locked when: + + - The 'prio' field contains the priority of the context that owns the + console. Only higher priority contexts are allowed to take over the + lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. + + - The 'cpu' field denotes on which CPU the console is locked. It is used + to prevent busy waiting on the same CPU. Also it informs the lock owner + that it has lost the lock in a more complex scenario when the lock was + taken over by a higher priority context, released, and taken on another + CPU with the same priority as the interrupted owner. + +The acquire mechanism uses a few more fields: + + - The 'req_prio' field is used by the handover approach to make the + current owner aware that there is a context with a higher priority + waiting for the friendly handover. + + - The 'unsafe' field allows to take over the console in a safe way in the + middle of emitting a message. The field is set only when accessing some + shared resources or when the console device is manipulated. It can be + cleared, for example, after emitting one character when the console + device is in a consistent state. + + - The 'unsafe_takeover' field is set when a hostile takeover took the + console in an unsafe state. The console will stay in the unsafe state + until re-initialized. + +The acquire mechanism uses three approaches: + + 1) Direct acquire when the console is not owned or is owned by a lower + priority context and is in a safe state. + + 2) Friendly handover mechanism uses a request/grant handshake. It is used + when the current owner has lower priority and the console is in an + unsafe state. + + The requesting context: + + a) Sets its priority into the 'req_prio' field. + + b) Waits (with a timeout) for the owning context to unlock the + console. + + c) Takes the lock and clears the 'req_prio' field. + + The owning context: + + a) Observes the 'req_prio' field set on exit from the unsafe + console state. + + b) Gives up console ownership by clearing the 'prio' field. + + 3) Unsafe hostile takeover allows to take over the lock even when the + console is an unsafe state. It is used only in panic() by the final + attempt to flush consoles in a try and hope mode. + + Note that separate record buffers are used in panic(). As a result, + the messages can be read and formatted without any risk even after + using the hostile takeover in unsafe state. + +The release function simply clears the 'prio' field. + +All operations on @console::nbcon_state are atomic cmpxchg based to +handle concurrency. + +The acquire/release functions implement only minimal policies: + + - Preference for higher priority contexts. + - Protection of the panic CPU. + +All other policy decisions must be made at the call sites: + + - What is marked as an unsafe section. + - Whether to spin-wait if there is already an owner and the console is + in an unsafe state. + - Whether to attempt an unsafe hostile takeover. + +The design allows to implement the well known: + + acquire() + output_one_printk_record() + release() + +The output of one printk record might be interrupted with a higher priority +context. The new owner is supposed to reprint the entire interrupted record +from scratch. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-3-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 56 +++++ + kernel/printk/nbcon.c | 497 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 553 insertions(+) + +diff --git a/include/linux/console.h b/include/linux/console.h +index bd962f178c8d..fe50cf14c0e3 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -180,13 +180,29 @@ enum cons_flags { + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * ++ * @req_prio: The priority of a handover request ++ * @prio: The priority of the current owner ++ * @unsafe: Console is busy in a non takeover region ++ * @unsafe_takeover: A hostile takeover in an unsafe state happened in the ++ * past. The console cannot be safe until re-initialized. ++ * @cpu: The CPU on which the owner runs ++ * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. ++ * ++ * The @prio and @req_prio fields are particularly important to allow ++ * spin-waiting to timeout and give up without the risk of a waiter being ++ * assigned the lock after giving up. */ --#define printk_deferred_enter __printk_safe_enter --#define printk_deferred_exit __printk_safe_exit -+#define printk_deferred_enter() __printk_deferred_enter() -+#define printk_deferred_exit() __printk_deferred_exit() + struct nbcon_state { + union { + unsigned int atom; + struct { ++ unsigned int prio : 2; ++ unsigned int req_prio : 2; ++ unsigned int unsafe : 1; ++ unsigned int unsafe_takeover : 1; ++ unsigned int cpu : 24; + }; + }; + }; +@@ -199,6 +215,46 @@ struct nbcon_state { + */ + static_assert(sizeof(struct nbcon_state) <= sizeof(int)); - /* - * Please don't use printk_ratelimit(), because it shares ratelimiting state -@@ -192,6 +197,10 @@ void show_regs_print_info(const char *log_lvl); - extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; - extern asmlinkage void dump_stack(void) __cold; - void printk_trigger_flush(void); -+void printk_legacy_allow_panic_sync(void); -+extern void nbcon_acquire(struct uart_port *up); -+extern void nbcon_release(struct uart_port *up); -+void nbcon_atomic_flush_unsafe(void); - #else - static inline __printf(1, 0) - int vprintk(const char *s, va_list args) -@@ -271,6 +280,23 @@ static inline void dump_stack(void) - static inline void printk_trigger_flush(void) - { - } ++/** ++ * nbcon_prio - console owner priority for nbcon consoles ++ * @NBCON_PRIO_NONE: Unused ++ * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage ++ * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) ++ * @NBCON_PRIO_PANIC: Panic output ++ * @NBCON_PRIO_MAX: The number of priority levels ++ * ++ * A higher priority context can takeover the console when it is ++ * in the safe state. The final attempt to flush consoles in panic() ++ * can be allowed to do so even in an unsafe state (Hope and pray). ++ */ ++enum nbcon_prio { ++ NBCON_PRIO_NONE = 0, ++ NBCON_PRIO_NORMAL, ++ NBCON_PRIO_EMERGENCY, ++ NBCON_PRIO_PANIC, ++ NBCON_PRIO_MAX, ++}; + -+static inline void printk_legacy_allow_panic_sync(void) -+{ -+} ++struct console; + -+static inline void nbcon_acquire(struct uart_port *up) ++/** ++ * struct nbcon_context - Context for console acquire/release ++ * @console: The associated console ++ * @spinwait_max_us: Limit for spin-wait acquire ++ * @prio: Priority of the context ++ * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can ++ * be used only with NBCON_PRIO_PANIC @prio. It ++ * might cause a system freeze when the console ++ * is used later. ++ */ ++struct nbcon_context { ++ /* members set by caller */ ++ struct console *console; ++ unsigned int spinwait_max_us; ++ enum nbcon_prio prio; ++ unsigned int allow_unsafe_takeover : 1; ++}; ++ + /** + * struct console - The console descriptor structure + * @name: The name of the console driver +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 63d24ca62ac5..a2a354f859f9 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -4,10 +4,98 @@ + + #include + #include ++#include + #include "internal.h" + /* + * Printk console printing implementation for consoles which does not depend + * on the legacy style console_lock mechanism. ++ * ++ * The state of the console is maintained in the "nbcon_state" atomic ++ * variable. ++ * ++ * The console is locked when: ++ * ++ * - The 'prio' field contains the priority of the context that owns the ++ * console. Only higher priority contexts are allowed to take over the ++ * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. ++ * ++ * - The 'cpu' field denotes on which CPU the console is locked. It is used ++ * to prevent busy waiting on the same CPU. Also it informs the lock owner ++ * that it has lost the lock in a more complex scenario when the lock was ++ * taken over by a higher priority context, released, and taken on another ++ * CPU with the same priority as the interrupted owner. ++ * ++ * The acquire mechanism uses a few more fields: ++ * ++ * - The 'req_prio' field is used by the handover approach to make the ++ * current owner aware that there is a context with a higher priority ++ * waiting for the friendly handover. ++ * ++ * - The 'unsafe' field allows to take over the console in a safe way in the ++ * middle of emitting a message. The field is set only when accessing some ++ * shared resources or when the console device is manipulated. It can be ++ * cleared, for example, after emitting one character when the console ++ * device is in a consistent state. ++ * ++ * - The 'unsafe_takeover' field is set when a hostile takeover took the ++ * console in an unsafe state. The console will stay in the unsafe state ++ * until re-initialized. ++ * ++ * The acquire mechanism uses three approaches: ++ * ++ * 1) Direct acquire when the console is not owned or is owned by a lower ++ * priority context and is in a safe state. ++ * ++ * 2) Friendly handover mechanism uses a request/grant handshake. It is used ++ * when the current owner has lower priority and the console is in an ++ * unsafe state. ++ * ++ * The requesting context: ++ * ++ * a) Sets its priority into the 'req_prio' field. ++ * ++ * b) Waits (with a timeout) for the owning context to unlock the ++ * console. ++ * ++ * c) Takes the lock and clears the 'req_prio' field. ++ * ++ * The owning context: ++ * ++ * a) Observes the 'req_prio' field set on exit from the unsafe ++ * console state. ++ * ++ * b) Gives up console ownership by clearing the 'prio' field. ++ * ++ * 3) Unsafe hostile takeover allows to take over the lock even when the ++ * console is an unsafe state. It is used only in panic() by the final ++ * attempt to flush consoles in a try and hope mode. ++ * ++ * The release function simply clears the 'prio' field. ++ * ++ * All operations on @console::nbcon_state are atomic cmpxchg based to ++ * handle concurrency. ++ * ++ * The acquire/release functions implement only minimal policies: ++ * ++ * - Preference for higher priority contexts. ++ * - Protection of the panic CPU. ++ * ++ * All other policy decisions must be made at the call sites: ++ * ++ * - What is marked as an unsafe section. ++ * - Whether to spin-wait if there is already an owner and the console is ++ * in an unsafe state. ++ * - Whether to attempt an unsafe hostile takeover. ++ * ++ * The design allows to implement the well known: ++ * ++ * acquire() ++ * output_one_printk_record() ++ * release() ++ * ++ * The output of one printk record might be interrupted with a higher priority ++ * context. The new owner is supposed to reprint the entire interrupted record ++ * from scratch. + */ + + /** +@@ -47,6 +135,415 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta + return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); + } + ++/** ++ * nbcon_context_try_acquire_direct - Try to acquire directly ++ * @ctxt: The context of the caller ++ * @cur: The current console state ++ * ++ * Acquire the console when it is released. Also acquire the console when ++ * the current owner has a lower priority and the console is in a safe state. ++ * ++ * Return: 0 on success. Otherwise, an error code on failure. Also @cur ++ * is updated to the latest state when failed to modify it. ++ * ++ * Errors: ++ * ++ * -EPERM: A panic is in progress and this is not the panic CPU. ++ * Or the current owner or waiter has the same or higher ++ * priority. No acquire method can be successful in ++ * this case. ++ * ++ * -EBUSY: The current owner has a lower priority but the console ++ * in an unsafe state. The caller should try using ++ * the handover acquire method. ++ */ ++static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, ++ struct nbcon_state *cur) +{ ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ struct nbcon_state new; ++ ++ do { ++ if (other_cpu_in_panic()) ++ return -EPERM; ++ ++ if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) ++ return -EPERM; ++ ++ if (cur->unsafe) ++ return -EBUSY; ++ ++ /* ++ * The console should never be safe for a direct acquire ++ * if an unsafe hostile takeover has ever happened. ++ */ ++ WARN_ON_ONCE(cur->unsafe_takeover); ++ ++ new.atom = cur->atom; ++ new.prio = ctxt->prio; ++ new.req_prio = NBCON_PRIO_NONE; ++ new.unsafe = cur->unsafe_takeover; ++ new.cpu = cpu; ++ ++ } while (!nbcon_state_try_cmpxchg(con, cur, &new)); ++ ++ return 0; +} + -+static inline void nbcon_release(struct uart_port *up) ++static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) +{ ++ /* ++ * The request context is well defined by the @req_prio because: ++ * ++ * - Only a context with a higher priority can take over the request. ++ * - There are only three priorities. ++ * - Only one CPU is allowed to request PANIC priority. ++ * - Lower priorities are ignored during panic() until reboot. ++ * ++ * As a result, the following scenario is *not* possible: ++ * ++ * 1. Another context with a higher priority directly takes ownership. ++ * 2. The higher priority context releases the ownership. ++ * 3. A lower priority context takes the ownership. ++ * 4. Another context with the same priority as this context ++ * creates a request and starts waiting. ++ */ ++ ++ return (cur->req_prio == expected_prio); +} + -+static inline void nbcon_atomic_flush_unsafe(void) ++/** ++ * nbcon_context_try_acquire_requested - Try to acquire after having ++ * requested a handover ++ * @ctxt: The context of the caller ++ * @cur: The current console state ++ * ++ * This is a helper function for nbcon_context_try_acquire_handover(). ++ * It is called when the console is in an unsafe state. The current ++ * owner will release the console on exit from the unsafe region. ++ * ++ * Return: 0 on success and @cur is updated to the new console state. ++ * Otherwise an error code on failure. ++ * ++ * Errors: ++ * ++ * -EPERM: A panic is in progress and this is not the panic CPU ++ * or this context is no longer the waiter. ++ * ++ * -EBUSY: The console is still locked. The caller should ++ * continue waiting. ++ * ++ * Note: The caller must still remove the request when an error has occurred ++ * except when this context is no longer the waiter. ++ */ ++static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, ++ struct nbcon_state *cur) +{ -+} ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ struct nbcon_state new; + - #endif - - #ifdef CONFIG_SMP -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 393c30034..0879ca36a 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -917,6 +917,9 @@ struct task_struct { - * ->sched_remote_wakeup gets used, so it can be in this word. - */ - unsigned sched_remote_wakeup:1; -+#ifdef CONFIG_RT_MUTEXES -+ unsigned sched_rt_mutex:1; -+#endif - - /* Bit to tell LSMs we're in execve(): */ - unsigned in_execve:1; -@@ -1909,6 +1912,7 @@ static inline int dl_task_check_affinity(struct task_struct *p, const struct cpu - } - #endif - -+extern bool task_is_pi_boosted(const struct task_struct *p); - extern int yield_to(struct task_struct *p, bool preempt); - extern void set_user_nice(struct task_struct *p, long nice); - extern int task_prio(const struct task_struct *p); -@@ -2053,17 +2057,17 @@ static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, - update_ti_thread_flag(task_thread_info(tsk), flag, value); - } - --static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) -+static inline bool test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) - { - return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); - } - --static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) -+static inline bool test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) - { - return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); - } ++ /* Note that the caller must still remove the request! */ ++ if (other_cpu_in_panic()) ++ return -EPERM; ++ ++ /* ++ * Note that the waiter will also change if there was an unsafe ++ * hostile takeover. ++ */ ++ if (!nbcon_waiter_matches(cur, ctxt->prio)) ++ return -EPERM; ++ ++ /* If still locked, caller should continue waiting. */ ++ if (cur->prio != NBCON_PRIO_NONE) ++ return -EBUSY; ++ ++ /* ++ * The previous owner should have never released ownership ++ * in an unsafe region. ++ */ ++ WARN_ON_ONCE(cur->unsafe); ++ ++ new.atom = cur->atom; ++ new.prio = ctxt->prio; ++ new.req_prio = NBCON_PRIO_NONE; ++ new.unsafe = cur->unsafe_takeover; ++ new.cpu = cpu; ++ ++ if (!nbcon_state_try_cmpxchg(con, cur, &new)) { ++ /* ++ * The acquire could fail only when it has been taken ++ * over by a higher priority context. ++ */ ++ WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); ++ return -EPERM; ++ } ++ ++ /* Handover success. This context now owns the console. */ ++ return 0; ++} ++ ++/** ++ * nbcon_context_try_acquire_handover - Try to acquire via handover ++ * @ctxt: The context of the caller ++ * @cur: The current console state ++ * ++ * The function must be called only when the context has higher priority ++ * than the current owner and the console is in an unsafe state. ++ * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. ++ * ++ * The function sets "req_prio" field to make the current owner aware of ++ * the request. Then it waits until the current owner releases the console, ++ * or an even higher context takes over the request, or timeout expires. ++ * ++ * The current owner checks the "req_prio" field on exit from the unsafe ++ * region and releases the console. It does not touch the "req_prio" field ++ * so that the console stays reserved for the waiter. ++ * ++ * Return: 0 on success. Otherwise, an error code on failure. Also @cur ++ * is updated to the latest state when failed to modify it. ++ * ++ * Errors: ++ * ++ * -EPERM: A panic is in progress and this is not the panic CPU. ++ * Or a higher priority context has taken over the ++ * console or the handover request. ++ * ++ * -EBUSY: The current owner is on the same CPU so that the hand ++ * shake could not work. Or the current owner is not ++ * willing to wait (zero timeout). Or the console does ++ * not enter the safe state before timeout passed. The ++ * caller might still use the unsafe hostile takeover ++ * when allowed. ++ * ++ * -EAGAIN: @cur has changed when creating the handover request. ++ * The caller should retry with direct acquire. ++ */ ++static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, ++ struct nbcon_state *cur) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ struct nbcon_state new; ++ int timeout; ++ int request_err = -EBUSY; ++ ++ /* ++ * Check that the handover is called when the direct acquire failed ++ * with -EBUSY. ++ */ ++ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); ++ WARN_ON_ONCE(!cur->unsafe); ++ ++ /* Handover is not possible on the same CPU. */ ++ if (cur->cpu == cpu) ++ return -EBUSY; ++ ++ /* ++ * Console stays unsafe after an unsafe takeover until re-initialized. ++ * Waiting is not going to help in this case. ++ */ ++ if (cur->unsafe_takeover) ++ return -EBUSY; ++ ++ /* Is the caller willing to wait? */ ++ if (ctxt->spinwait_max_us == 0) ++ return -EBUSY; ++ ++ /* ++ * Setup a request for the handover. The caller should try to acquire ++ * the console directly when the current state has been modified. ++ */ ++ new.atom = cur->atom; ++ new.req_prio = ctxt->prio; ++ if (!nbcon_state_try_cmpxchg(con, cur, &new)) ++ return -EAGAIN; ++ ++ cur->atom = new.atom; ++ ++ /* Wait until there is no owner and then acquire the console. */ ++ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { ++ /* On successful acquire, this request is cleared. */ ++ request_err = nbcon_context_try_acquire_requested(ctxt, cur); ++ if (!request_err) ++ return 0; ++ ++ /* ++ * If the acquire should be aborted, it must be ensured ++ * that the request is removed before returning to caller. ++ */ ++ if (request_err == -EPERM) ++ break; ++ ++ udelay(1); ++ ++ /* Re-read the state because some time has passed. */ ++ nbcon_state_read(con, cur); ++ } ++ ++ /* Timed out or aborted. Carefully remove handover request. */ ++ do { ++ /* ++ * No need to remove request if there is a new waiter. This ++ * can only happen if a higher priority context has taken over ++ * the console or the handover request. ++ */ ++ if (!nbcon_waiter_matches(cur, ctxt->prio)) ++ return -EPERM; ++ ++ /* Unset request for handover. */ ++ new.atom = cur->atom; ++ new.req_prio = NBCON_PRIO_NONE; ++ if (nbcon_state_try_cmpxchg(con, cur, &new)) { ++ /* ++ * Request successfully unset. Report failure of ++ * acquiring via handover. ++ */ ++ cur->atom = new.atom; ++ return request_err; ++ } ++ ++ /* ++ * Unable to remove request. Try to acquire in case ++ * the owner has released the lock. ++ */ ++ } while (nbcon_context_try_acquire_requested(ctxt, cur)); ++ ++ /* Lucky timing. The acquire succeeded while removing the request. */ ++ return 0; ++} ++ ++/** ++ * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover ++ * @ctxt: The context of the caller ++ * @cur: The current console state ++ * ++ * Acquire the console even in the unsafe state. ++ * ++ * It can be permitted by setting the 'allow_unsafe_takeover' field only ++ * by the final attempt to flush messages in panic(). ++ * ++ * Return: 0 on success. -EPERM when not allowed by the context. ++ */ ++static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, ++ struct nbcon_state *cur) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ struct nbcon_state new; ++ ++ if (!ctxt->allow_unsafe_takeover) ++ return -EPERM; ++ ++ /* Ensure caller is allowed to perform unsafe hostile takeovers. */ ++ if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) ++ return -EPERM; ++ ++ /* ++ * Check that try_acquire_direct() and try_acquire_handover() returned ++ * -EBUSY in the right situation. ++ */ ++ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); ++ WARN_ON_ONCE(cur->unsafe != true); ++ ++ do { ++ new.atom = cur->atom; ++ new.cpu = cpu; ++ new.prio = ctxt->prio; ++ new.unsafe |= cur->unsafe_takeover; ++ new.unsafe_takeover |= cur->unsafe; ++ ++ } while (!nbcon_state_try_cmpxchg(con, cur, &new)); ++ ++ return 0; ++} ++ ++/** ++ * nbcon_context_try_acquire - Try to acquire nbcon console ++ * @ctxt: The context of the caller ++ * ++ * Return: True if the console was acquired. False otherwise. ++ * ++ * If the caller allowed an unsafe hostile takeover, on success the ++ * caller should check the current console state to see if it is ++ * in an unsafe state. Otherwise, on success the caller may assume ++ * the console is not in an unsafe state. ++ */ ++__maybe_unused ++static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ struct nbcon_state cur; ++ int err; ++ ++ nbcon_state_read(con, &cur); ++try_again: ++ err = nbcon_context_try_acquire_direct(ctxt, &cur); ++ if (err != -EBUSY) ++ goto out; ++ ++ err = nbcon_context_try_acquire_handover(ctxt, &cur); ++ if (err == -EAGAIN) ++ goto try_again; ++ if (err != -EBUSY) ++ goto out; ++ ++ err = nbcon_context_try_acquire_hostile(ctxt, &cur); ++out: ++ return !err; ++} ++ ++static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, ++ int expected_prio) ++{ ++ /* ++ * Since consoles can only be acquired by higher priorities, ++ * owning contexts are uniquely identified by @prio. However, ++ * since contexts can unexpectedly lose ownership, it is ++ * possible that later another owner appears with the same ++ * priority. For this reason @cpu is also needed. ++ */ ++ ++ if (cur->prio != expected_prio) ++ return false; ++ ++ if (cur->cpu != expected_cpu) ++ return false; ++ ++ return true; ++} ++ ++/** ++ * nbcon_context_release - Release the console ++ * @ctxt: The nbcon context from nbcon_context_try_acquire() ++ */ ++__maybe_unused ++static void nbcon_context_release(struct nbcon_context *ctxt) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ struct nbcon_state cur; ++ struct nbcon_state new; ++ ++ nbcon_state_read(con, &cur); ++ ++ do { ++ if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) ++ return; ++ ++ new.atom = cur.atom; ++ new.prio = NBCON_PRIO_NONE; ++ ++ /* ++ * If @unsafe_takeover is set, it is kept set so that ++ * the state remains permanently unsafe. ++ */ ++ new.unsafe |= cur.unsafe_takeover; ++ ++ } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ++} ++ + /** + * nbcon_init - Initialize the nbcon console specific data + * @con: Console to initialize +-- +2.51.0 + +From 9a581e31d2bb86deffb1bccffc35aa84b933c56c Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Sat, 16 Sep 2023 21:26:02 +0206 +Subject: [PATCH 102/213] printk: Make static printk buffers available to nbcon + +The nbcon boot consoles also need printk buffers that are available +very early. Since the nbcon boot consoles will also be serialized +by the console_lock, they can use the same static printk buffers +that the legacy consoles are using. + +Make the legacy static printk buffers available outside of printk.c +so they can be used by nbcon.c. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-4-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 2 ++ + kernel/printk/printk.c | 13 +++++++++---- + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 2ca0ab78802c..7199d60bfc25 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -86,6 +86,8 @@ static inline void nbcon_cleanup(struct console *con) { } --static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) -+static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag) - { - return test_ti_thread_flag(task_thread_info(tsk), flag); - } -@@ -2076,9 +2080,11 @@ static inline void set_tsk_need_resched(struct task_struct *tsk) - static inline void clear_tsk_need_resched(struct task_struct *tsk) - { - clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); -+ if (IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO)) -+ clear_tsk_thread_flag(tsk, TIF_NEED_RESCHED_LAZY); - } + #endif /* CONFIG_PRINTK */ --static inline int test_tsk_need_resched(struct task_struct *tsk) -+static inline bool test_tsk_need_resched(struct task_struct *tsk) - { - return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); ++extern struct printk_buffers printk_shared_pbufs; ++ + /** + * struct printk_buffers - Buffers to read/format/output printk messages. + * @outbuf: After formatting, contains text to output. +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 0bcffaac59c2..239b8ea943e8 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2880,6 +2880,13 @@ static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + return true; } -@@ -2259,7 +2265,7 @@ static inline int rwlock_needbreak(rwlock_t *lock) - static __always_inline bool need_resched(void) ++/* ++ * Used as the printk buffers for non-panic, serialized console printing. ++ * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. ++ * Its usage requires the console_lock held. ++ */ ++struct printk_buffers printk_shared_pbufs; ++ + /* + * Print one record for the given console. The record printed is whatever + * record is the next available record for the given console. +@@ -2897,12 +2904,10 @@ static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + */ + static bool console_emit_next_record(struct console *con, bool *handover, int cookie) { -- return unlikely(tif_need_resched()); -+ return unlikely(tif_need_resched_lazy() || tif_need_resched()); - } +- static struct printk_buffers pbufs; +- + bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; +- char *outbuf = &pbufs.outbuf[0]; ++ char *outbuf = &printk_shared_pbufs.outbuf[0]; + struct printk_message pmsg = { +- .pbufs = &pbufs, ++ .pbufs = &printk_shared_pbufs, + }; + unsigned long flags; - /* -diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h -index 478084f91..719416fe8 100644 ---- a/include/linux/sched/idle.h -+++ b/include/linux/sched/idle.h -@@ -63,7 +63,7 @@ static __always_inline bool __must_check current_set_polling_and_test(void) - */ - smp_mb__after_atomic(); +-- +2.51.0 + +From 979e0c0bdb560f829f031dbd616451e7fec9c24e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:03 +0206 +Subject: [PATCH 103/213] printk: nbcon: Add buffer management + +In case of hostile takeovers it must be ensured that the previous +owner cannot scribble over the output buffer of the emergency/panic +context. This is achieved by: + + - Adding a global output buffer instance for the panic context. + This is the only situation where hostile takeovers can occur and + there is always at most 1 panic context. + + - Allocating an output buffer per non-boot console upon console + registration. This buffer is used by the console owner when not + in panic context. (For boot consoles, the existing shared global + legacy output buffer is used instead. Boot console printing will + be synchronized with legacy console printing.) + + - Choosing the appropriate buffer is handled in the acquire/release + functions. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-5-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 7 ++++ + kernel/printk/internal.h | 12 +++++-- + kernel/printk/nbcon.c | 73 +++++++++++++++++++++++++++++++++++++--- + kernel/printk/printk.c | 22 +++++++----- + 4 files changed, 99 insertions(+), 15 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index fe50cf14c0e3..867eb2534950 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -236,6 +236,7 @@ enum nbcon_prio { + }; -- return unlikely(tif_need_resched()); -+ return unlikely(need_resched()); - } + struct console; ++struct printk_buffers; - static __always_inline bool __must_check current_clr_polling_and_test(void) -@@ -76,7 +76,7 @@ static __always_inline bool __must_check current_clr_polling_and_test(void) - */ - smp_mb__after_atomic(); + /** + * struct nbcon_context - Context for console acquire/release +@@ -246,6 +247,7 @@ struct console; + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. ++ * @pbufs: Pointer to the text buffer for this context + */ + struct nbcon_context { + /* members set by caller */ +@@ -253,6 +255,9 @@ struct nbcon_context { + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; ++ ++ /* members set by acquire */ ++ struct printk_buffers *pbufs; + }; -- return unlikely(tif_need_resched()); -+ return unlikely(need_resched()); - } + /** +@@ -276,6 +281,7 @@ struct nbcon_context { + * @node: hlist node for the console list + * + * @nbcon_state: State for nbcon consoles ++ * @pbufs: Pointer to nbcon private buffer + */ + struct console { + char name[16]; +@@ -298,6 +304,7 @@ struct console { - #else -@@ -85,11 +85,11 @@ static inline void __current_clr_polling(void) { } + /* nbcon console specific members */ + atomic_t __private nbcon_state; ++ struct printk_buffers *pbufs; + }; - static inline bool __must_check current_set_polling_and_test(void) - { -- return unlikely(tif_need_resched()); -+ return unlikely(need_resched()); - } - static inline bool __must_check current_clr_polling_and_test(void) - { -- return unlikely(tif_need_resched()); -+ return unlikely(need_resched()); - } + #ifdef CONFIG_LOCKDEP +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 7199d60bfc25..f6161cd75d7d 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -13,6 +13,12 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + #define printk_sysctl_init() do { } while (0) #endif -diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h -index 994c25640..b2b9e6eb9 100644 ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) - } - - #ifdef CONFIG_RT_MUTEXES -+extern void rt_mutex_pre_schedule(void); -+extern void rt_mutex_schedule(void); -+extern void rt_mutex_post_schedule(void); ++#define con_printk(lvl, con, fmt, ...) \ ++ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ ++ (con->flags & CON_NBCON) ? "" : "legacy ", \ ++ (con->flags & CON_BOOT) ? "boot" : "", \ ++ con->name, con->index, ##__VA_ARGS__) + - /* - * Must hold either p->pi_lock or task_rq(p)->lock. - */ -diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h -index be65de65f..ec46e3b49 100644 ---- a/include/linux/serial_8250.h -+++ b/include/linux/serial_8250.h -@@ -153,6 +153,8 @@ struct uart_8250_port { - #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA - unsigned char msr_saved_flags; + #ifdef CONFIG_PRINTK -+ bool console_newline_needed; -+ - struct uart_8250_dma *dma; - const struct uart_8250_ops *ops; + #ifdef CONFIG_PRINTK_CALLER +@@ -63,8 +69,9 @@ void defer_console_output(void); + u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags); -@@ -204,6 +206,10 @@ void serial8250_init_port(struct uart_8250_port *up); - void serial8250_set_defaults(struct uart_8250_port *up); - void serial8250_console_write(struct uart_8250_port *up, const char *s, - unsigned int count); -+bool serial8250_console_write_atomic(struct uart_8250_port *up, -+ struct nbcon_write_context *wctxt); -+bool serial8250_console_write_thread(struct uart_8250_port *up, -+ struct nbcon_write_context *wctxt); - int serial8250_console_setup(struct uart_port *port, char *options, bool probe); - int serial8250_console_exit(struct uart_port *port); ++bool nbcon_alloc(struct console *con); + void nbcon_init(struct console *con); +-void nbcon_cleanup(struct console *con); ++void nbcon_free(struct console *con); -diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h -index 052df85df..6de3d7aab 100644 ---- a/include/linux/serial_core.h -+++ b/include/linux/serial_core.h -@@ -489,6 +489,7 @@ struct uart_port { - struct uart_icount icount; /* statistics */ + #else - struct console *cons; /* struct console, if any */ -+ bool nbcon_locked_port; /* True, if the port is locked by nbcon */ - /* flags must be updated while holding port mutex */ - upf_t flags; +@@ -81,8 +88,9 @@ void nbcon_cleanup(struct console *con); + #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) -@@ -596,6 +597,7 @@ struct uart_port { - static inline void uart_port_lock(struct uart_port *up) - { - spin_lock(&up->lock); -+ nbcon_acquire(up); + static inline bool printk_percpu_data_ready(void) { return false; } ++static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } +-static inline void nbcon_cleanup(struct console *con) { } ++static inline void nbcon_free(struct console *con) { } + + #endif /* CONFIG_PRINTK */ + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index a2a354f859f9..ba1febf15db6 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + #include "internal.h" + /* + * Printk console printing implementation for consoles which does not depend +@@ -70,6 +71,10 @@ + * console is an unsafe state. It is used only in panic() by the final + * attempt to flush consoles in a try and hope mode. + * ++ * Note that separate record buffers are used in panic(). As a result, ++ * the messages can be read and formatted without any risk even after ++ * using the hostile takeover in unsafe state. ++ * + * The release function simply clears the 'prio' field. + * + * All operations on @console::nbcon_state are atomic cmpxchg based to +@@ -459,6 +464,8 @@ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, + return 0; } ++static struct printk_buffers panic_nbcon_pbufs; ++ /** -@@ -605,6 +607,7 @@ static inline void uart_port_lock(struct uart_port *up) - static inline void uart_port_lock_irq(struct uart_port *up) + * nbcon_context_try_acquire - Try to acquire nbcon console + * @ctxt: The context of the caller +@@ -473,6 +480,7 @@ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, + __maybe_unused + static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { - spin_lock_irq(&up->lock); -+ nbcon_acquire(up); - } ++ unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state cur; + int err; +@@ -491,7 +499,18 @@ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) - /** -@@ -615,6 +618,7 @@ static inline void uart_port_lock_irq(struct uart_port *up) - static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) - { - spin_lock_irqsave(&up->lock, *flags); -+ nbcon_acquire(up); + err = nbcon_context_try_acquire_hostile(ctxt, &cur); + out: +- return !err; ++ if (err) ++ return false; ++ ++ /* Acquire succeeded. */ ++ ++ /* Assign the appropriate buffer for this context. */ ++ if (atomic_read(&panic_cpu) == cpu) ++ ctxt->pbufs = &panic_nbcon_pbufs; ++ else ++ ctxt->pbufs = con->pbufs; ++ ++ return true; } - /** -@@ -625,7 +629,11 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f - */ - static inline bool uart_port_trylock(struct uart_port *up) - { -- return spin_trylock(&up->lock); -+ if (!spin_trylock(&up->lock)) -+ return false; + static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, +@@ -530,7 +549,7 @@ static void nbcon_context_release(struct nbcon_context *ctxt) + + do { + if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) +- return; ++ break; + + new.atom = cur.atom; + new.prio = NBCON_PRIO_NONE; +@@ -542,26 +561,70 @@ static void nbcon_context_release(struct nbcon_context *ctxt) + new.unsafe |= cur.unsafe_takeover; + + } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ++ ++ ctxt->pbufs = NULL; ++} ++ ++/** ++ * nbcon_alloc - Allocate buffers needed by the nbcon console ++ * @con: Console to allocate buffers for ++ * ++ * Return: True on success. False otherwise and the console cannot ++ * be used. ++ * ++ * This is not part of nbcon_init() because buffer allocation must ++ * be performed earlier in the console registration process. ++ */ ++bool nbcon_alloc(struct console *con) ++{ ++ if (con->flags & CON_BOOT) { ++ /* ++ * Boot console printing is synchronized with legacy console ++ * printing, so boot consoles can share the same global printk ++ * buffers. ++ */ ++ con->pbufs = &printk_shared_pbufs; ++ } else { ++ con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); ++ if (!con->pbufs) { ++ con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); ++ return false; ++ } ++ } + -+ nbcon_acquire(up); + return true; } /** -@@ -637,7 +645,11 @@ static inline bool uart_port_trylock(struct uart_port *up) + * nbcon_init - Initialize the nbcon console specific data + * @con: Console to initialize ++ * ++ * nbcon_alloc() *must* be called and succeed before this function ++ * is called. */ - static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) + void nbcon_init(struct console *con) { -- return spin_trylock_irqsave(&up->lock, *flags); -+ if (!spin_trylock_irqsave(&up->lock, *flags)) -+ return false; + struct nbcon_state state = { }; + ++ /* nbcon_alloc() must have been called and successful! */ ++ BUG_ON(!con->pbufs); + -+ nbcon_acquire(up); -+ return true; + nbcon_state_set(con, &state); } /** -@@ -646,6 +658,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long +- * nbcon_cleanup - Cleanup the nbcon console specific data +- * @con: Console to cleanup ++ * nbcon_free - Free and cleanup the nbcon console specific data ++ * @con: Console to free/cleanup nbcon data */ - static inline void uart_port_unlock(struct uart_port *up) +-void nbcon_cleanup(struct console *con) ++void nbcon_free(struct console *con) { -+ nbcon_release(up); - spin_unlock(&up->lock); + struct nbcon_state state = { }; + + nbcon_state_set(con, &state); ++ ++ /* Boot consoles share global printk buffers. */ ++ if (!(con->flags & CON_BOOT)) ++ kfree(con->pbufs); ++ ++ con->pbufs = NULL; } +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 239b8ea943e8..c716841c2ef5 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3390,12 +3390,6 @@ static void try_enable_default_console(struct console *newcon) + newcon->flags |= CON_CONSDEV; + } + +-#define con_printk(lvl, con, fmt, ...) \ +- printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ +- (con->flags & CON_NBCON) ? "" : "legacy ", \ +- (con->flags & CON_BOOT) ? "boot" : "", \ +- con->name, con->index, ##__VA_ARGS__) +- + static void console_init_seq(struct console *newcon, bool bootcon_registered) + { + struct console *con; +@@ -3509,6 +3503,15 @@ void register_console(struct console *newcon) + goto unlock; + } + ++ if (newcon->flags & CON_NBCON) { ++ /* ++ * Ensure the nbcon console buffers can be allocated ++ * before modifying any global data. ++ */ ++ if (!nbcon_alloc(newcon)) ++ goto unlock; ++ } ++ + /* + * See if we want to enable this console driver by default. + * +@@ -3536,8 +3539,11 @@ void register_console(struct console *newcon) + err = try_enable_preferred_console(newcon, false); + + /* printk() messages are not printed to the Braille console. */ +- if (err || newcon->flags & CON_BRL) ++ if (err || newcon->flags & CON_BRL) { ++ if (newcon->flags & CON_NBCON) ++ nbcon_free(newcon); + goto unlock; ++ } + + /* + * If we have a bootconsole, and are switching to a real console, +@@ -3648,7 +3654,7 @@ static int unregister_console_locked(struct console *console) + synchronize_srcu(&console_srcu); + + if (console->flags & CON_NBCON) +- nbcon_cleanup(console); ++ nbcon_free(console); + + console_sysfs_notify(); -@@ -655,6 +668,7 @@ static inline void uart_port_unlock(struct uart_port *up) +-- +2.51.0 + +From c561557f8de4d11b50360741ee04999c27a14b80 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:04 +0206 +Subject: [PATCH 104/213] printk: nbcon: Add ownership state functions + +Provide functions that are related to the safe handover mechanism +and allow console drivers to dynamically specify unsafe regions: + + - nbcon_context_can_proceed() + + Invoked by a console owner to check whether a handover request + is pending or whether the console has been taken over by another + context. If a handover request is pending, this function will + also perform the handover, thus cancelling its own ownership. + + - nbcon_context_enter_unsafe()/nbcon_context_exit_unsafe() + + Invoked by a console owner to denote that the driver is about + to enter or leave a critical region where a take over is unsafe. + The exit variant is the point where the current owner releases + the lock for a higher priority context which asked for the + friendly handover. + + The unsafe state is stored in the console state and allows a + new context to make informed decisions whether to attempt a + takeover of such a console. The unsafe state is also available + to the driver so that it can make informed decisions about the + required actions and possibly take a special emergency path. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-6-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/nbcon.c | 123 +++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 122 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index ba1febf15db6..98e4be5429f0 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -537,7 +537,6 @@ static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, + * nbcon_context_release - Release the console + * @ctxt: The nbcon context from nbcon_context_try_acquire() */ - static inline void uart_port_unlock_irq(struct uart_port *up) +-__maybe_unused + static void nbcon_context_release(struct nbcon_context *ctxt) { -+ nbcon_release(up); - spin_unlock_irq(&up->lock); + unsigned int cpu = smp_processor_id(); +@@ -565,6 +564,128 @@ static void nbcon_context_release(struct nbcon_context *ctxt) + ctxt->pbufs = NULL; } -@@ -664,6 +678,19 @@ static inline void uart_port_unlock_irq(struct uart_port *up) - * @flags: The saved interrupt flags for restore - */ - static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) ++/** ++ * nbcon_context_can_proceed - Check whether ownership can proceed ++ * @ctxt: The nbcon context from nbcon_context_try_acquire() ++ * @cur: The current console state ++ * ++ * Return: True if this context still owns the console. False if ++ * ownership was handed over or taken. ++ * ++ * Must be invoked when entering the unsafe state to make sure that it still ++ * owns the lock. Also must be invoked when exiting the unsafe context ++ * to eventually free the lock for a higher priority context which asked ++ * for the friendly handover. ++ * ++ * It can be called inside an unsafe section when the console is just ++ * temporary in safe state instead of exiting and entering the unsafe ++ * state. ++ * ++ * Also it can be called in the safe context before doing an expensive ++ * safe operation. It does not make sense to do the operation when ++ * a higher priority context took the lock. ++ * ++ * When this function returns false then the calling context no longer owns ++ * the console and is no longer allowed to go forward. In this case it must ++ * back out immediately and carefully. The buffer content is also no longer ++ * trusted since it no longer belongs to the calling context. ++ */ ++static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur) +{ -+ nbcon_release(up); -+ spin_unlock_irqrestore(&up->lock, flags); ++ unsigned int cpu = smp_processor_id(); ++ ++ /* Make sure this context still owns the console. */ ++ if (!nbcon_owner_matches(cur, cpu, ctxt->prio)) ++ return false; ++ ++ /* The console owner can proceed if there is no waiter. */ ++ if (cur->req_prio == NBCON_PRIO_NONE) ++ return true; ++ ++ /* ++ * A console owner within an unsafe region is always allowed to ++ * proceed, even if there are waiters. It can perform a handover ++ * when exiting the unsafe region. Otherwise the waiter will ++ * need to perform an unsafe hostile takeover. ++ */ ++ if (cur->unsafe) ++ return true; ++ ++ /* Waiters always have higher priorities than owners. */ ++ WARN_ON_ONCE(cur->req_prio <= cur->prio); ++ ++ /* ++ * Having a safe point for take over and eventually a few ++ * duplicated characters or a full line is way better than a ++ * hostile takeover. Post processing can take care of the garbage. ++ * Release and hand over. ++ */ ++ nbcon_context_release(ctxt); ++ ++ /* ++ * It is not clear whether the waiter really took over ownership. The ++ * outermost callsite must make the final decision whether console ++ * ownership is needed for it to proceed. If yes, it must reacquire ++ * ownership (possibly hostile) before carefully proceeding. ++ * ++ * The calling context no longer owns the console so go back all the ++ * way instead of trying to implement reacquire heuristics in tons of ++ * places. ++ */ ++ return false; +} + -+/* Only for use in the console->driver_enter() callback. */ -+static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) -+{ -+ spin_lock_irqsave(&up->lock, *flags); ++#define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) ++#define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) ++ ++/** ++ * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state ++ * @ctxt: The nbcon context from nbcon_context_try_acquire() ++ * @unsafe: The new value for the unsafe bit ++ * ++ * Return: True if the unsafe state was updated and this context still ++ * owns the console. Otherwise false if ownership was handed ++ * over or taken. ++ * ++ * This function allows console owners to modify the unsafe status of the ++ * console. ++ * ++ * When this function returns false then the calling context no longer owns ++ * the console and is no longer allowed to go forward. In this case it must ++ * back out immediately and carefully. The buffer content is also no longer ++ * trusted since it no longer belongs to the calling context. ++ * ++ * Internal helper to avoid duplicated code. ++ */ ++__maybe_unused ++static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) ++{ ++ struct console *con = ctxt->console; ++ struct nbcon_state cur; ++ struct nbcon_state new; ++ ++ nbcon_state_read(con, &cur); ++ ++ do { ++ /* ++ * The unsafe bit must not be cleared if an ++ * unsafe hostile takeover has occurred. ++ */ ++ if (!unsafe && cur.unsafe_takeover) ++ goto out; ++ ++ if (!nbcon_context_can_proceed(ctxt, &cur)) ++ return false; ++ ++ new.atom = cur.atom; ++ new.unsafe = unsafe; ++ } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ++ ++ cur.atom = new.atom; ++out: ++ return nbcon_context_can_proceed(ctxt, &cur); +} + -+/* Only for use in the console->driver_exit() callback. */ -+static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) - { - spin_unlock_irqrestore(&up->lock, flags); - } -@@ -1078,14 +1105,14 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) - u8 sysrq_ch; + /** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for +-- +2.51.0 + +From 5158565998c84e868a3eec48cc0139c7c50ae5df Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:05 +0206 +Subject: [PATCH 105/213] printk: nbcon: Add sequence handling + +Add an atomic_long_t field @nbcon_seq to the console struct to +store the sequence number for nbcon consoles. For nbcon consoles +this will be used instead of the non-atomic @seq field. The new +field allows for safe atomic sequence number updates without +requiring any locking. + +On 64bit systems the new field stores the full sequence number. +On 32bit systems the new field stores the lower 32 bits of the +sequence number, which are expanded to 64bit as needed by +folding the values based on the sequence numbers available in +the ringbuffer. + +For 32bit systems, having a 32bit representation in the console +is sufficient. If a console ever gets more than 2^31 records +behind the ringbuffer then this is the least of the problems. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-7-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 4 ++ + kernel/printk/internal.h | 7 +++ + kernel/printk/nbcon.c | 101 +++++++++++++++++++++++++++++++++++++++ + kernel/printk/printk.c | 31 +++++++++--- + 4 files changed, 136 insertions(+), 7 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 867eb2534950..8a326fd9085b 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -248,6 +248,7 @@ struct printk_buffers; + * might cause a system freeze when the console + * is used later. + * @pbufs: Pointer to the text buffer for this context ++ * @seq: The sequence number to print for this context + */ + struct nbcon_context { + /* members set by caller */ +@@ -258,6 +259,7 @@ struct nbcon_context { - if (!port->has_sysrq) { -- spin_unlock(&port->lock); -+ uart_port_unlock(port); - return; - } + /* members set by acquire */ + struct printk_buffers *pbufs; ++ u64 seq; + }; - sysrq_ch = port->sysrq_ch; - port->sysrq_ch = 0; + /** +@@ -281,6 +283,7 @@ struct nbcon_context { + * @node: hlist node for the console list + * + * @nbcon_state: State for nbcon consoles ++ * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer + */ + struct console { +@@ -304,6 +307,7 @@ struct console { -- spin_unlock(&port->lock); -+ uart_port_unlock(port); + /* nbcon console specific members */ + atomic_t __private nbcon_state; ++ atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; + }; - if (sysrq_ch) - handle_sysrq(sysrq_ch); -@@ -1097,14 +1124,14 @@ static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port - u8 sysrq_ch; +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index f6161cd75d7d..6473f5ae4a18 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -4,6 +4,7 @@ + */ + #include + #include ++#include "printk_ringbuffer.h" - if (!port->has_sysrq) { -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); - return; - } + #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) + void __init printk_sysctl_init(void); +@@ -42,6 +43,8 @@ enum printk_info_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; - sysrq_ch = port->sysrq_ch; - port->sysrq_ch = 0; ++extern struct printk_ringbuffer *prb; ++ + __printf(4, 0) + int vprintk_store(int facility, int level, + const struct dev_printk_info *dev_info, +@@ -69,6 +72,8 @@ void defer_console_output(void); + u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags); -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); ++u64 nbcon_seq_read(struct console *con); ++void nbcon_seq_force(struct console *con, u64 seq); + bool nbcon_alloc(struct console *con); + void nbcon_init(struct console *con); + void nbcon_free(struct console *con); +@@ -88,6 +93,8 @@ void nbcon_free(struct console *con); + #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) - if (sysrq_ch) - handle_sysrq(sysrq_ch); -@@ -1120,12 +1147,12 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) - } - static inline void uart_unlock_and_check_sysrq(struct uart_port *port) - { -- spin_unlock(&port->lock); -+ uart_port_unlock(port); - } - static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, - unsigned long flags) - { -- spin_unlock_irqrestore(&port->lock, flags); -+ uart_port_unlock_irqrestore(port, flags); + static inline bool printk_percpu_data_ready(void) { return false; } ++static inline u64 nbcon_seq_read(struct console *con) { return 0; } ++static inline void nbcon_seq_force(struct console *con, u64 seq) { } + static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } + static inline void nbcon_free(struct console *con) { } +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 98e4be5429f0..e076096b31c0 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -140,6 +140,101 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta + return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } - #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ - -diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h -index 9ea0b2806..5ded1450a 100644 ---- a/include/linux/thread_info.h -+++ b/include/linux/thread_info.h -@@ -59,6 +59,16 @@ enum syscall_work_bit { - - #include -+#ifdef CONFIG_PREEMPT_BUILD_AUTO -+# define TIF_NEED_RESCHED_LAZY TIF_ARCH_RESCHED_LAZY -+# define _TIF_NEED_RESCHED_LAZY _TIF_ARCH_RESCHED_LAZY -+# define TIF_NEED_RESCHED_LAZY_OFFSET (TIF_NEED_RESCHED_LAZY - TIF_NEED_RESCHED) -+#else -+# define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED -+# define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED -+# define TIF_NEED_RESCHED_LAZY_OFFSET 0 -+#endif ++#ifdef CONFIG_64BIT + - #ifdef __KERNEL__ - - #ifndef arch_set_restart_data -@@ -185,6 +195,13 @@ static __always_inline bool tif_need_resched(void) - (unsigned long *)(¤t_thread_info()->flags)); - } - -+static __always_inline bool tif_need_resched_lazy(void) ++#define __seq_to_nbcon_seq(seq) (seq) ++#define __nbcon_seq_to_seq(seq) (seq) ++ ++#else /* CONFIG_64BIT */ ++ ++#define __seq_to_nbcon_seq(seq) ((u32)seq) ++ ++static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) +{ -+ return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && -+ arch_test_bit(TIF_NEED_RESCHED_LAZY, -+ (unsigned long *)(¤t_thread_info()->flags)); ++ u64 seq; ++ u64 rb_next_seq; ++ ++ /* ++ * The provided sequence is only the lower 32 bits of the ringbuffer ++ * sequence. It needs to be expanded to 64bit. Get the next sequence ++ * number from the ringbuffer and fold it. ++ * ++ * Having a 32bit representation in the console is sufficient. ++ * If a console ever gets more than 2^31 records behind ++ * the ringbuffer then this is the least of the problems. ++ * ++ * Also the access to the ring buffer is always safe. ++ */ ++ rb_next_seq = prb_next_seq(prb); ++ seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); ++ ++ return seq; +} + - #else - - static __always_inline bool tif_need_resched(void) -@@ -193,6 +210,13 @@ static __always_inline bool tif_need_resched(void) - (unsigned long *)(¤t_thread_info()->flags)); - } - -+static __always_inline bool tif_need_resched_lazy(void) ++#endif /* CONFIG_64BIT */ ++ ++/** ++ * nbcon_seq_read - Read the current console sequence ++ * @con: Console to read the sequence of ++ * ++ * Return: Sequence number of the next record to print on @con. ++ */ ++u64 nbcon_seq_read(struct console *con) +{ -+ return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && -+ test_bit(TIF_NEED_RESCHED_LAZY, -+ (unsigned long *)(¤t_thread_info()->flags)); ++ unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); ++ ++ return __nbcon_seq_to_seq(nbcon_seq); +} + - #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ - - #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES -diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h -index aa1bc4172..689c7b0ae 100644 ---- a/include/linux/trace_events.h -+++ b/include/linux/trace_events.h -@@ -178,8 +178,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); - - enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, -- TRACE_FLAG_IRQS_NOSUPPORT = 0x02, -- TRACE_FLAG_NEED_RESCHED = 0x04, -+ TRACE_FLAG_NEED_RESCHED = 0x02, -+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, -@@ -205,11 +205,11 @@ static inline unsigned int tracing_gen_ctx(void) - - static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) - { -- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -+ return tracing_gen_ctx_irq_test(0); - } - static inline unsigned int tracing_gen_ctx(void) - { -- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -+ return tracing_gen_ctx_irq_test(0); - } - #endif - -diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt -index c2f1fd95a..0f3d4c2a4 100644 ---- a/kernel/Kconfig.preempt -+++ b/kernel/Kconfig.preempt -@@ -11,6 +11,13 @@ config PREEMPT_BUILD - select PREEMPTION - select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK - -+config PREEMPT_BUILD_AUTO -+ bool -+ select PREEMPT_BUILD ++/** ++ * nbcon_seq_force - Force console sequence to a specific value ++ * @con: Console to work on ++ * @seq: Sequence number value to set ++ * ++ * Only to be used during init (before registration) or in extreme situations ++ * (such as panic with CONSOLE_REPLAY_ALL). ++ */ ++void nbcon_seq_force(struct console *con, u64 seq) ++{ ++ /* ++ * If the specified record no longer exists, the oldest available record ++ * is chosen. This is especially important on 32bit systems because only ++ * the lower 32 bits of the sequence number are stored. The upper 32 bits ++ * are derived from the sequence numbers available in the ringbuffer. ++ */ ++ u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); + -+config HAVE_PREEMPT_AUTO -+ bool ++ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); + - choice - prompt "Preemption Model" - default PREEMPT_NONE -@@ -67,9 +74,17 @@ config PREEMPT - embedded system with latency requirements in the milliseconds - range. ++ /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ ++ con->seq = 0; ++} ++ ++/** ++ * nbcon_seq_try_update - Try to update the console sequence number ++ * @ctxt: Pointer to an acquire context that contains ++ * all information about the acquire mode ++ * @new_seq: The new sequence number to set ++ * ++ * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to ++ * the 64bit value). This could be a different value than @new_seq if ++ * nbcon_seq_force() was used or the current context no longer owns the ++ * console. In the later case, it will stop printing anyway. ++ */ ++__maybe_unused ++static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) ++{ ++ unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); ++ struct console *con = ctxt->console; ++ ++ if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, ++ __seq_to_nbcon_seq(new_seq))) { ++ ctxt->seq = new_seq; ++ } else { ++ ctxt->seq = nbcon_seq_read(con); ++ } ++} ++ + /** + * nbcon_context_try_acquire_direct - Try to acquire directly + * @ctxt: The context of the caller +@@ -510,6 +605,9 @@ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) + else + ctxt->pbufs = con->pbufs; -+config PREEMPT_AUTO -+ bool "Automagic preemption mode with runtime tweaking support" -+ depends on HAVE_PREEMPT_AUTO -+ select PREEMPT_BUILD_AUTO -+ help -+ Add some sensible blurb here ++ /* Set the record sequence for this context to print. */ ++ ctxt->seq = nbcon_seq_read(ctxt->console); + - config PREEMPT_RT - bool "Fully Preemptible Kernel (Real-Time)" - depends on EXPERT && ARCH_SUPPORTS_RT -+ select PREEMPT_BUILD_AUTO if HAVE_PREEMPT_AUTO - select PREEMPTION - help - This option turns the kernel into a real-time kernel by replacing -@@ -95,7 +110,7 @@ config PREEMPTION + return true; + } - config PREEMPT_DYNAMIC - bool "Preemption behaviour defined on boot" -- depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT -+ depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT && !PREEMPT_AUTO - select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY - select PREEMPT_BUILD - default y if HAVE_PREEMPT_DYNAMIC_CALL -diff --git a/kernel/entry/common.c b/kernel/entry/common.c -index 5ff4f1cd3..fd42f0b17 100644 ---- a/kernel/entry/common.c -+++ b/kernel/entry/common.c -@@ -161,7 +161,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, +@@ -722,6 +820,8 @@ bool nbcon_alloc(struct console *con) + * + * nbcon_alloc() *must* be called and succeed before this function + * is called. ++ * ++ * This function expects that the legacy @con->seq has been set. + */ + void nbcon_init(struct console *con) + { +@@ -730,6 +830,7 @@ void nbcon_init(struct console *con) + /* nbcon_alloc() must have been called and successful! */ + BUG_ON(!con->pbufs); - local_irq_enable_exit_to_user(ti_work); ++ nbcon_seq_force(con, con->seq); + nbcon_state_set(con, &state); + } -- if (ti_work & _TIF_NEED_RESCHED) -+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - schedule(); +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index c716841c2ef5..d6322d91612e 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -494,7 +494,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, - if (ti_work & _TIF_UPROBE) -@@ -391,7 +391,7 @@ void raw_irqentry_exit_cond_resched(void) - rcu_irq_exit_check_preempt(); - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) - WARN_ON_ONCE(!on_thread_stack()); -- if (need_resched()) -+ if (test_tsk_need_resched(current)) - preempt_schedule_irq(); + static struct printk_ringbuffer printk_rb_dynamic; + +-static struct printk_ringbuffer *prb = &printk_rb_static; ++struct printk_ringbuffer *prb = &printk_rb_static; + + /* + * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before +@@ -3212,6 +3212,7 @@ void console_flush_on_panic(enum con_flush_mode mode) + + if (mode == CONSOLE_REPLAY_ALL) { + struct console *c; ++ short flags; + int cookie; + u64 seq; + +@@ -3219,11 +3220,17 @@ void console_flush_on_panic(enum con_flush_mode mode) + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { +- /* +- * This is an unsynchronized assignment, but the +- * kernel is in "hope and pray" mode anyway. +- */ +- c->seq = seq; ++ flags = console_srcu_read_flags(c); ++ ++ if (flags & CON_NBCON) { ++ nbcon_seq_force(c, seq); ++ } else { ++ /* ++ * This is an unsynchronized assignment. On ++ * panic legacy consoles are only best effort. ++ */ ++ c->seq = seq; ++ } + } + console_srcu_read_unlock(cookie); } - } -diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c -index 2e0f75bcb..5253d3d2d 100644 ---- a/kernel/entry/kvm.c -+++ b/kernel/entry/kvm.c -@@ -13,7 +13,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) - return -EINTR; +@@ -3809,6 +3816,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + struct console *c; + u64 last_diff = 0; + u64 printk_seq; ++ short flags; + int cookie; + u64 diff; + u64 seq; +@@ -3836,6 +3844,9 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + for_each_console_srcu(c) { + if (con && con != c) + continue; ++ ++ flags = console_srcu_read_flags(c); ++ + /* + * If consoles are not usable, it cannot be expected + * that they make forward progress, so only increment +@@ -3843,7 +3854,13 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + */ + if (!console_is_usable(c)) + continue; +- printk_seq = c->seq; ++ ++ if (flags & CON_NBCON) { ++ printk_seq = nbcon_seq_read(c); ++ } else { ++ printk_seq = c->seq; ++ } ++ + if (printk_seq < seq) + diff += seq - printk_seq; } +-- +2.51.0 + +From 6b36d52eefd1aef5c2f73ecd6688fd107c0365d6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:06 +0206 +Subject: [PATCH 106/213] printk: nbcon: Add emit function and callback + function for atomic printing + +Implement an emit function for nbcon consoles to output printk +messages. It utilizes the lockless printk_get_next_message() and +console_prepend_dropped() functions to retrieve/build the output +message. The emit function includes the required safety points to +check for handover/takeover and calls a new write_atomic callback +of the console driver to output the message. It also includes +proper handling for updating the nbcon console sequence number. + +A new nbcon_write_context struct is introduced. This is provided +to the write_atomic callback and includes only the information +necessary for performing atomic writes. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-8-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 21 ++++++++ + kernel/printk/internal.h | 6 +++ + kernel/printk/nbcon.c | 106 ++++++++++++++++++++++++++++++++++++++- + kernel/printk/printk.c | 9 ++-- + 4 files changed, 134 insertions(+), 8 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 8a326fd9085b..740c94fad68b 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -247,6 +247,7 @@ struct printk_buffers; + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. ++ * @backlog: Ringbuffer has pending records + * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context + */ +@@ -257,11 +258,28 @@ struct nbcon_context { + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; -- if (ti_work & _TIF_NEED_RESCHED) -+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - schedule(); ++ /* members set by emit */ ++ unsigned int backlog : 1; ++ + /* members set by acquire */ + struct printk_buffers *pbufs; + u64 seq; + }; - if (ti_work & _TIF_NOTIFY_RESUME) -diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c -index ce2889f12..d636a1bbd 100644 ---- a/kernel/futex/pi.c -+++ b/kernel/futex/pi.c -@@ -1,6 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0-or-later ++/** ++ * struct nbcon_write_context - Context handed to the nbcon write callbacks ++ * @ctxt: The core console context ++ * @outbuf: Pointer to the text buffer for output ++ * @len: Length to write ++ * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred ++ */ ++struct nbcon_write_context { ++ struct nbcon_context __private ctxt; ++ char *outbuf; ++ unsigned int len; ++ bool unsafe_takeover; ++}; ++ + /** + * struct console - The console descriptor structure + * @name: The name of the console driver +@@ -282,6 +300,7 @@ struct nbcon_context { + * @data: Driver private data + * @node: hlist node for the console list + * ++ * @write_atomic: Write callback for atomic context + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer +@@ -306,6 +325,8 @@ struct console { + struct hlist_node node; - #include -+#include - #include + /* nbcon console specific members */ ++ bool (*write_atomic)(struct console *con, ++ struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 6473f5ae4a18..6c2afee5ef62 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -130,3 +130,9 @@ struct printk_message { + }; - #include "futex.h" -@@ -610,29 +611,16 @@ int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, - /* - * Caller must hold a reference on @pi_state. + bool other_cpu_in_panic(void); ++bool printk_get_next_message(struct printk_message *pmsg, u64 seq, ++ bool is_extended, bool may_supress); ++ ++#ifdef CONFIG_PRINTK ++void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); ++#endif +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index e076096b31c0..6e05d263fd22 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -221,7 +221,6 @@ void nbcon_seq_force(struct console *con, u64 seq) + * nbcon_seq_force() was used or the current context no longer owns the + * console. In the later case, it will stop printing anyway. */ --static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) -+static int wake_futex_pi(u32 __user *uaddr, u32 uval, -+ struct futex_pi_state *pi_state, -+ struct rt_mutex_waiter *top_waiter) +-__maybe_unused + static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { -- struct rt_mutex_waiter *top_waiter; - struct task_struct *new_owner; - bool postunlock = false; - DEFINE_RT_WAKE_Q(wqh); - u32 curval, newval; - int ret = 0; - -- top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); -- if (WARN_ON_ONCE(!top_waiter)) { -- /* -- * As per the comment in futex_unlock_pi() this should not happen. -- * -- * When this happens, give up our locks and try again, giving -- * the futex_lock_pi() instance time to complete, either by -- * waiting on the rtmutex or removing itself from the futex -- * queue. -- */ -- ret = -EAGAIN; -- goto out_unlock; -- } -- - new_owner = top_waiter->task; - - /* -@@ -1002,6 +990,12 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl - goto no_block; - } + unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); +@@ -755,7 +754,6 @@ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_s + * + * Internal helper to avoid duplicated code. + */ +-__maybe_unused + static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) + { + struct console *con = ctxt->console; +@@ -784,6 +782,110 @@ static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsaf + return nbcon_context_can_proceed(ctxt, &cur); + } ++/** ++ * nbcon_emit_next_record - Emit a record in the acquired context ++ * @wctxt: The write context that will be handed to the write function ++ * ++ * Return: True if this context still owns the console. False if ++ * ownership was handed over or taken. ++ * ++ * When this function returns false then the calling context no longer owns ++ * the console and is no longer allowed to go forward. In this case it must ++ * back out immediately and carefully. The buffer content is also no longer ++ * trusted since it no longer belongs to the calling context. If the caller ++ * wants to do more it must reacquire the console first. ++ * ++ * When true is returned, @wctxt->ctxt.backlog indicates whether there are ++ * still records pending in the ringbuffer, ++ */ ++__maybe_unused ++static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) ++{ ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; ++ struct printk_message pmsg = { ++ .pbufs = ctxt->pbufs, ++ }; ++ unsigned long con_dropped; ++ struct nbcon_state cur; ++ unsigned long dropped; ++ bool done; ++ + /* -+ * Must be done before we enqueue the waiter, here is unfortunately -+ * under the hb lock, but that *should* work because it does nothing. ++ * The printk buffers are filled within an unsafe section. This ++ * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from ++ * clobbering each other. ++ */ ++ ++ if (!nbcon_context_enter_unsafe(ctxt)) ++ return false; ++ ++ ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); ++ if (!ctxt->backlog) ++ return nbcon_context_exit_unsafe(ctxt); ++ ++ /* ++ * @con->dropped is not protected in case of an unsafe hostile ++ * takeover. In that situation the update can be racy so ++ * annotate it accordingly. + */ -+ rt_mutex_pre_schedule(); ++ con_dropped = data_race(READ_ONCE(con->dropped)); ++ ++ dropped = con_dropped + pmsg.dropped; ++ if (dropped && !is_extended) ++ console_prepend_dropped(&pmsg, dropped); ++ ++ if (!nbcon_context_exit_unsafe(ctxt)) ++ return false; ++ ++ /* For skipped records just update seq/dropped in @con. */ ++ if (pmsg.outbuf_len == 0) ++ goto update_con; ++ ++ /* Initialize the write context for driver callbacks. */ ++ wctxt->outbuf = &pmsg.pbufs->outbuf[0]; ++ wctxt->len = pmsg.outbuf_len; ++ nbcon_state_read(con, &cur); ++ wctxt->unsafe_takeover = cur.unsafe_takeover; ++ ++ if (con->write_atomic) { ++ done = con->write_atomic(con, wctxt); ++ } else { ++ nbcon_context_release(ctxt); ++ WARN_ON_ONCE(1); ++ done = false; ++ } ++ ++ /* If not done, the emit was aborted. */ ++ if (!done) ++ return false; + - rt_mutex_init_waiter(&rt_waiter); - - /* -@@ -1039,19 +1033,37 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl - ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); - - cleanup: -- spin_lock(q.lock_ptr); - /* - * If we failed to acquire the lock (deadlock/signal/timeout), we must -- * first acquire the hb->lock before removing the lock from the -- * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait -- * lists consistent. -+ * must unwind the above, however we canont lock hb->lock because -+ * rt_mutex already has a waiter enqueued and hb->lock can itself try -+ * and enqueue an rt_waiter through rtlock. -+ * -+ * Doing the cleanup without holding hb->lock can cause inconsistent -+ * state between hb and pi_state, but only in the direction of not -+ * seeing a waiter that is leaving. -+ * -+ * See futex_unlock_pi(), it deals with this inconsistency. -+ * -+ * There be dragons here, since we must deal with the inconsistency on -+ * the way out (here), it is impossible to detect/warn about the race -+ * the other way around (missing an incoming waiter). - * -- * In particular; it is important that futex_unlock_pi() can not -- * observe this inconsistency. -+ * What could possibly go wrong... - */ - if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) - ret = 0; - + /* -+ * Now that the rt_waiter has been dequeued, it is safe to use -+ * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up -+ * the ++ * Since any dropped message was successfully output, reset the ++ * dropped count for the console. + */ -+ spin_lock(q.lock_ptr); ++ dropped = 0; ++update_con: + /* -+ * Waiter is unqueued. ++ * The dropped count and the sequence number are updated within an ++ * unsafe section. This limits update races to the panic context and ++ * allows the panic context to win. + */ -+ rt_mutex_post_schedule(); - no_block: - /* - * Fixup the pi_state owner and possibly acquire the lock if we -@@ -1132,6 +1144,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) - top_waiter = futex_top_waiter(hb, &key); - if (top_waiter) { - struct futex_pi_state *pi_state = top_waiter->pi_state; -+ struct rt_mutex_waiter *rt_waiter; - - ret = -EINVAL; - if (!pi_state) -@@ -1144,22 +1157,39 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) - if (pi_state->owner != current) - goto out_unlock; - -- get_pi_state(pi_state); - /* - * By taking wait_lock while still holding hb->lock, we ensure -- * there is no point where we hold neither; and therefore -- * wake_futex_p() must observe a state consistent with what we -- * observed. -+ * there is no point where we hold neither; and thereby -+ * wake_futex_pi() must observe any new waiters. -+ * -+ * Since the cleanup: case in futex_lock_pi() removes the -+ * rt_waiter without holding hb->lock, it is possible for -+ * wake_futex_pi() to not find a waiter while the above does, -+ * in this case the waiter is on the way out and it can be -+ * ignored. - * - * In particular; this forces __rt_mutex_start_proxy() to - * complete such that we're guaranteed to observe the -- * rt_waiter. Also see the WARN in wake_futex_pi(). -+ * rt_waiter. - */ - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + -+ /* -+ * Futex vs rt_mutex waiter state -- if there are no rt_mutex -+ * waiters even though futex thinks there are, then the waiter -+ * is leaving and the uncontended path is safe to take. -+ */ -+ rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); -+ if (!rt_waiter) { -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -+ goto do_uncontended; -+ } ++ if (!nbcon_context_enter_unsafe(ctxt)) ++ return false; + -+ get_pi_state(pi_state); - spin_unlock(&hb->lock); - - /* drops pi_state->pi_mutex.wait_lock */ -- ret = wake_futex_pi(uaddr, uval, pi_state); -+ ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter); - - put_pi_state(pi_state); - -@@ -1187,6 +1217,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) - return ret; - } - -+do_uncontended: - /* - * We have no kernel internal state, i.e. no waiters in the - * kernel. Waiters which are about to queue themselves are stuck -diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c -index cba8b1a6a..4c73e0b81 100644 ---- a/kernel/futex/requeue.c -+++ b/kernel/futex/requeue.c -@@ -850,11 +850,13 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, - pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); - -- /* Current is not longer pi_blocked_on */ -- spin_lock(q.lock_ptr); -+ /* -+ * See futex_unlock_pi()'s cleanup: comment. -+ */ - if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) - ret = 0; - -+ spin_lock(q.lock_ptr); - debug_rt_mutex_free_waiter(&rt_waiter); - /* - * Fixup the pi_state owner and possibly acquire the lock if we -diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c -index 1d4bc493b..486c68c11 100644 ---- a/kernel/ksysfs.c -+++ b/kernel/ksysfs.c -@@ -179,6 +179,15 @@ KERNEL_ATTR_RO(crash_elfcorehdr_size); - - #endif /* CONFIG_CRASH_CORE */ - -+#if defined(CONFIG_PREEMPT_RT) -+static ssize_t realtime_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%d\n", 1); ++ if (dropped != con_dropped) { ++ /* Counterpart to the READ_ONCE() above. */ ++ WRITE_ONCE(con->dropped, dropped); ++ } ++ ++ nbcon_seq_try_update(ctxt, pmsg.seq + 1); ++ ++ return nbcon_context_exit_unsafe(ctxt); +} -+KERNEL_ATTR_RO(realtime); -+#endif + - /* whether file capabilities are enabled */ - static ssize_t fscaps_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -@@ -274,6 +283,9 @@ static struct attribute * kernel_attrs[] = { - #ifndef CONFIG_TINY_RCU - &rcu_expedited_attr.attr, - &rcu_normal_attr.attr, -+#endif -+#ifdef CONFIG_PREEMPT_RT -+ &realtime_attr.attr, - #endif - NULL - }; -diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c -index 3468d8230..6eef8527e 100644 ---- a/kernel/locking/lockdep.c -+++ b/kernel/locking/lockdep.c -@@ -56,6 +56,7 @@ - #include - #include - #include -+#include - - #include - -@@ -3971,6 +3972,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, - if (!debug_locks_off() || debug_locks_silent) - return; + /** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index d6322d91612e..4a99303728e1 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -698,9 +698,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, + return len; + } -+ nbcon_cpu_emergency_enter(); -+ - pr_warn("\n"); - pr_warn("================================\n"); - pr_warn("WARNING: inconsistent lock state\n"); -@@ -3999,6 +4002,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, +-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, +- bool is_extended, bool may_supress); +- + /* /dev/kmsg - userspace message inject/listen interface */ + struct devkmsg_user { + atomic64_t seq; +@@ -2767,7 +2764,7 @@ static void __console_unlock(void) + * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. + */ + #ifdef CONFIG_PRINTK +-static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) ++void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) + { + struct printk_buffers *pbufs = pmsg->pbufs; + const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); +@@ -2821,8 +2818,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d + * of @pmsg are valid. (See the documentation of struct printk_message + * for information about the @pmsg fields.) + */ +-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, +- bool is_extended, bool may_suppress) ++bool printk_get_next_message(struct printk_message *pmsg, u64 seq, ++ bool is_extended, bool may_suppress) + { + static int panic_console_dropped; + +-- +2.51.0 + +From d1a04cf2d6404273afc77571c211ca44bb3f50f4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 16 Sep 2023 21:26:07 +0206 +Subject: [PATCH 107/213] printk: nbcon: Allow drivers to mark unsafe regions + and check state + +For the write_atomic callback, the console driver may have unsafe +regions that need to be appropriately marked. Provide functions +that accept the nbcon_write_context struct to allow for the driver +to enter and exit unsafe regions. + +Also provide a function for drivers to check if they are still the +owner of the console. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230916192007.608398-9-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 10 ++++++ + kernel/printk/nbcon.c | 75 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 85 insertions(+) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 740c94fad68b..69242c2593d2 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -456,6 +456,16 @@ static inline bool console_is_registered(const struct console *con) + lockdep_assert_console_list_lock_held(); \ + hlist_for_each_entry(con, &console_list, node) - pr_warn("\nstack backtrace:\n"); - dump_stack(); ++#ifdef CONFIG_PRINTK ++extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); ++extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); ++extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); ++#else ++static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } ++static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } ++static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } ++#endif + -+ nbcon_cpu_emergency_exit(); - } + extern int console_set_on_cmdline; + extern struct console *early_console; - /* -diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index bf3a28ee7..99129c891 100644 ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, - return try_cmpxchg_acquire(&lock->owner, &old, new); +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 6e05d263fd22..b96077152f49 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -732,6 +732,41 @@ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_s + return false; } -+static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) ++/** ++ * nbcon_can_proceed - Check whether ownership can proceed ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Return: True if this context still owns the console. False if ++ * ownership was handed over or taken. ++ * ++ * It is used in nbcon_enter_unsafe() to make sure that it still owns the ++ * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock ++ * for a higher priority context which asked for the friendly handover. ++ * ++ * It can be called inside an unsafe section when the console is just ++ * temporary in safe state instead of exiting and entering the unsafe state. ++ * ++ * Also it can be called in the safe context before doing an expensive safe ++ * operation. It does not make sense to do the operation when a higher ++ * priority context took the lock. ++ * ++ * When this function returns false then the calling context no longer owns ++ * the console and is no longer allowed to go forward. In this case it must ++ * back out immediately and carefully. The buffer content is also no longer ++ * trusted since it no longer belongs to the calling context. ++ */ ++bool nbcon_can_proceed(struct nbcon_write_context *wctxt) +{ -+ return rt_mutex_cmpxchg_acquire(lock, NULL, current); ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ struct nbcon_state cur; ++ ++ nbcon_state_read(con, &cur); ++ ++ return nbcon_context_can_proceed(ctxt, &cur); +} ++EXPORT_SYMBOL_GPL(nbcon_can_proceed); + - static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) -@@ -297,6 +302,20 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, + #define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) + #define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) +@@ -782,6 +817,46 @@ static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsaf + return nbcon_context_can_proceed(ctxt, &cur); } -+static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); ++/** ++ * nbcon_enter_unsafe - Enter an unsafe region in the driver ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Return: True if this context still owns the console. False if ++ * ownership was handed over or taken. ++ * ++ * When this function returns false then the calling context no longer owns ++ * the console and is no longer allowed to go forward. In this case it must ++ * back out immediately and carefully. The buffer content is also no longer ++ * trusted since it no longer belongs to the calling context. ++ */ ++bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) ++{ ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + -+static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) ++ return nbcon_context_enter_unsafe(ctxt); ++} ++EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); ++ ++/** ++ * nbcon_exit_unsafe - Exit an unsafe region in the driver ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Return: True if this context still owns the console. False if ++ * ownership was handed over or taken. ++ * ++ * When this function returns false then the calling context no longer owns ++ * the console and is no longer allowed to go forward. In this case it must ++ * back out immediately and carefully. The buffer content is also no longer ++ * trusted since it no longer belongs to the calling context. ++ */ ++bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) +{ -+ /* -+ * With debug enabled rt_mutex_cmpxchg trylock() will always fail. -+ * -+ * Avoid unconditionally taking the slow path by using -+ * rt_mutex_slow_trylock() which is covered by the debug code and can -+ * acquire a non-contended rtmutex. -+ */ -+ return rt_mutex_slowtrylock(lock); ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ ++ return nbcon_context_exit_unsafe(ctxt); +} ++EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + - static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) -@@ -1613,7 +1632,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, - raw_spin_unlock_irq(&lock->wait_lock); - - if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) -- schedule(); -+ rt_mutex_schedule(); - - raw_spin_lock_irq(&lock->wait_lock); - set_current_state(state); -@@ -1643,7 +1662,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, - - while (1) { - set_current_state(TASK_INTERRUPTIBLE); -- schedule(); -+ rt_mutex_schedule(); - } - } + /** + * nbcon_emit_next_record - Emit a record in the acquired context + * @wctxt: The write context that will be handed to the write function +-- +2.51.0 + +From 98f9ecc28513bba800567b9efc8bcd27ddffa3f7 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 20 Sep 2023 17:58:38 +0206 +Subject: [PATCH 108/213] printk: fix illegal pbufs access for !CONFIG_PRINTK + +When CONFIG_PRINTK is not set, PRINTK_MESSAGE_MAX is 0. This +leads to a zero-sized array @outbuf in @printk_shared_pbufs. In +console_flush_all() a pointer to the first element of the array +is assigned with: + + char *outbuf = &printk_shared_pbufs.outbuf[0]; + +For !CONFIG_PRINTK this leads to a compiler warning: + + warning: array subscript 0 is outside array bounds of + 'char[0]' [-Warray-bounds] + +This is not really dangerous because printk_get_next_message() +always returns false for !CONFIG_PRINTK, which leads to @outbuf +never being used. However, it makes no sense to even compile +these functions for !CONFIG_PRINTK. + +Extend the existing '#ifdef CONFIG_PRINTK' block to contain +the formatting and emitting functions since these have no +purpose in !CONFIG_PRINTK. This also allows removing several +more !CONFIG_PRINTK dummies as well as moving +@suppress_panic_printk into a CONFIG_PRINTK block. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202309201724.M9BMAQIh-lkp@intel.com/ +Signed-off-by: John Ogness +Reviewed-by: Sergey Senozhatsky +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230920155238.670439-1-john.ogness@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 44 +++++++++++++++++------------------------- + 1 file changed, 18 insertions(+), 26 deletions(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 4a99303728e1..d41b34f88515 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -102,12 +102,6 @@ DEFINE_STATIC_SRCU(console_srcu); + */ + int __read_mostly suppress_printk; -@@ -1738,6 +1757,15 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, - unsigned long flags; - int ret; +-/* +- * During panic, heavy printk by other CPUs can delay the +- * panic and risk deadlock on console resources. +- */ +-static int __read_mostly suppress_panic_printk; +- + #ifdef CONFIG_LOCKDEP + static struct lockdep_map console_lock_dep_map = { + .name = "console_lock" +@@ -445,6 +439,12 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + static DEFINE_MUTEX(syslog_lock); -+ /* -+ * Do all pre-schedule work here, before we queue a waiter and invoke -+ * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would -+ * otherwise recurse back into task_blocks_on_rt_mutex() through -+ * rtlock_slowlock() and will then enqueue a second waiter for this -+ * same task and things get really confusing real fast. -+ */ -+ rt_mutex_pre_schedule(); + #ifdef CONFIG_PRINTK ++/* ++ * During panic, heavy printk by other CPUs can delay the ++ * panic and risk deadlock on console resources. ++ */ ++static int __read_mostly suppress_panic_printk; + - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can - * be called in early boot if the cmpxchg() fast path is disabled -@@ -1749,6 +1777,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, - raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ rt_mutex_post_schedule(); + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ +@@ -2380,22 +2380,6 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre - return ret; - } -@@ -1756,7 +1785,9 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, - static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, - unsigned int state) - { -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ lockdep_assert(!current->pi_blocked_on); -+ -+ if (likely(rt_mutex_try_acquire(lock))) - return 0; + static u64 syslog_seq; - return rt_mutex_slowlock(lock, NULL, state); -diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c -index 25ec02394..34a59569d 100644 ---- a/kernel/locking/rwbase_rt.c -+++ b/kernel/locking/rwbase_rt.c -@@ -71,6 +71,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, - struct rt_mutex_base *rtm = &rwb->rtmutex; - int ret; +-static size_t record_print_text(const struct printk_record *r, +- bool syslog, bool time) +-{ +- return 0; +-} +-static ssize_t info_print_ext_header(char *buf, size_t size, +- struct printk_info *info) +-{ +- return 0; +-} +-static ssize_t msg_print_ext_body(char *buf, size_t size, +- char *text, size_t text_len, +- struct dev_printk_info *dev_info) { return 0; } +-static void console_lock_spinning_enable(void) { } +-static int console_lock_spinning_disable_and_check(int cookie) { return 0; } +-static bool suppress_message_printing(int level) { return false; } + static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } -+ rwbase_pre_schedule(); - raw_spin_lock_irq(&rtm->wait_lock); +@@ -2749,6 +2733,8 @@ static void __console_unlock(void) + up_console_sem(); + } - /* -@@ -125,12 +126,15 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, - rwbase_rtmutex_unlock(rtm); ++#ifdef CONFIG_PRINTK ++ + /* + * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This + * is achieved by shifting the existing message over and inserting the dropped +@@ -2763,7 +2749,6 @@ static void __console_unlock(void) + * + * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. + */ +-#ifdef CONFIG_PRINTK + void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) + { + struct printk_buffers *pbufs = pmsg->pbufs; +@@ -2795,9 +2780,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) + memcpy(outbuf, scratchbuf, len); + pmsg->outbuf_len += len; + } +-#else +-#define console_prepend_dropped(pmsg, dropped) +-#endif /* CONFIG_PRINTK */ - trace_contention_end(rwb, ret); -+ rwbase_post_schedule(); - return ret; + /* + * Read and format the specified record (or a later record if the specified +@@ -2955,6 +2937,16 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co + return true; } - static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, - unsigned int state) ++#else ++ ++static bool console_emit_next_record(struct console *con, bool *handover, int cookie) ++{ ++ *handover = false; ++ return false; ++} ++ ++#endif /* CONFIG_PRINTK */ ++ + /* + * Print out all remaining records to all consoles. + * +-- +2.51.0 + +From 8358a76fef4b7f8df419a801654a8391809b855a Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Fri, 6 Oct 2023 10:21:51 +0200 +Subject: [PATCH 109/213] printk: Reduce pr_flush() pooling time + +pr_flush() does not guarantee that all messages would really get flushed +to the console. The best it could do is to wait with a given timeout.[*] + +The current interval 100ms for checking the progress might seem too +long in some situations. For example, such delays are not appreciated +during suspend and resume especially when the consoles have been flushed +"long" time before the check. + +On the other hand, the sleeping wait might be useful in other situations. +Especially, it would allow flushing the messages using printk kthreads +on the same CPU[*]. + +Use msleep(1) as a compromise. + +Also measure the time using jiffies. msleep() does not guarantee +precise wakeup after the given delay. It might be much longer, +especially for times < 20s. See Documentation/timers/timers-howto.rst +for more details. + +Note that msecs_to_jiffies() already translates a negative value into +an infinite timeout. + +[*] console_unlock() does not guarantee flushing the consoles since + the commit dbdda842fe96f893 ("printk: Add console owner and waiter + logic to load balance console writes"). + + It would be possible to guarantee it another way. For example, + the spinning might be enabled only when the console_lock has been + taken via console_trylock(). + + But the load balancing is helpful. And more importantly, the flush + with a timeout has been added as a preparation step for introducing + printk kthreads. + +Signed-off-by: Petr Mladek +Reviewed-by: John Ogness +Link: https://lore.kernel.org/r/20231006082151.6969-3-pmladek@suse.com +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index d41b34f88515..1a3f8dd56fe0 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3801,7 +3801,8 @@ late_initcall(printk_late_init); + /* If @con is specified, only wait for that console. Otherwise wait for all. */ + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { -+ lockdep_assert(!current->pi_blocked_on); +- int remaining = timeout_ms; ++ unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); ++ unsigned long remaining_jiffies = timeout_jiffies; + struct console *c; + u64 last_diff = 0; + u64 printk_seq; +@@ -3819,6 +3820,9 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + console_unlock(); + + for (;;) { ++ unsigned long begin_jiffies; ++ unsigned long slept_jiffies; + - if (rwbase_read_trylock(rwb)) - return 0; + diff = 0; + + /* +@@ -3856,24 +3860,20 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + console_srcu_read_unlock(cookie); + + if (diff != last_diff && reset_on_progress) +- remaining = timeout_ms; ++ remaining_jiffies = timeout_jiffies; -@@ -237,6 +241,8 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, - /* Force readers into slow path */ - atomic_sub(READER_BIAS, &rwb->readers); + console_unlock(); -+ rwbase_pre_schedule(); + /* Note: @diff is 0 if there are no usable consoles. */ +- if (diff == 0 || remaining == 0) ++ if (diff == 0 || remaining_jiffies == 0) + break; + +- if (remaining < 0) { +- /* no timeout limit */ +- msleep(100); +- } else if (remaining < 100) { +- msleep(remaining); +- remaining = 0; +- } else { +- msleep(100); +- remaining -= 100; +- } ++ /* msleep(1) might sleep much longer. Check time by jiffies. */ ++ begin_jiffies = jiffies; ++ msleep(1); ++ slept_jiffies = jiffies - begin_jiffies; + - raw_spin_lock_irqsave(&rtm->wait_lock, flags); - if (__rwbase_write_trylock(rwb)) - goto out_unlock; -@@ -248,6 +254,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, - if (rwbase_signal_pending_state(state, current)) { - rwbase_restore_current_state(); - __rwbase_write_unlock(rwb, 0, flags); -+ rwbase_post_schedule(); - trace_contention_end(rwb, -EINTR); - return -EINTR; - } -@@ -266,6 +273,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, ++ remaining_jiffies -= min(slept_jiffies, remaining_jiffies); - out_unlock: - raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); -+ rwbase_post_schedule(); - return 0; + last_diff = diff; + } +-- +2.51.0 + +From 02aaa18964dcbc06bc1f22b4dbf303a1a4679c1b Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 6 Dec 2023 12:01:56 +0000 +Subject: [PATCH 110/213] printk: nbcon: Relocate 32bit seq macros + +The macros __seq_to_nbcon_seq() and __nbcon_seq_to_seq() are +used to provide support for atomic handling of sequence numbers +on 32bit systems. Until now this was only used by nbcon.c, +which is why they were located in nbcon.c and include nbcon in +the name. + +In a follow-up commit this functionality is also needed by +printk_ringbuffer. Rather than duplicating the functionality, +relocate the macros to printk_ringbuffer.h. + +Also, since the macros will be no longer nbcon-specific, rename +them to __u64seq_to_ulseq() and __ulseq_to_u64seq(). + +This does not result in any functional change. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/nbcon.c | 41 +++---------------------------- + kernel/printk/printk_ringbuffer.h | 33 +++++++++++++++++++++++++ + 2 files changed, 37 insertions(+), 37 deletions(-) + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index b96077152f49..c8093bcc01fe 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -140,39 +140,6 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta + return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } -diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c -index 11ed7ce65..62eac9fd8 100644 ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -1427,8 +1427,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem) - #define rwbase_signal_pending_state(state, current) \ - signal_pending_state(state, current) +-#ifdef CONFIG_64BIT +- +-#define __seq_to_nbcon_seq(seq) (seq) +-#define __nbcon_seq_to_seq(seq) (seq) +- +-#else /* CONFIG_64BIT */ +- +-#define __seq_to_nbcon_seq(seq) ((u32)seq) +- +-static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) +-{ +- u64 seq; +- u64 rb_next_seq; +- +- /* +- * The provided sequence is only the lower 32 bits of the ringbuffer +- * sequence. It needs to be expanded to 64bit. Get the next sequence +- * number from the ringbuffer and fold it. +- * +- * Having a 32bit representation in the console is sufficient. +- * If a console ever gets more than 2^31 records behind +- * the ringbuffer then this is the least of the problems. +- * +- * Also the access to the ring buffer is always safe. +- */ +- rb_next_seq = prb_next_seq(prb); +- seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); +- +- return seq; +-} +- +-#endif /* CONFIG_64BIT */ +- + /** + * nbcon_seq_read - Read the current console sequence + * @con: Console to read the sequence of +@@ -183,7 +150,7 @@ u64 nbcon_seq_read(struct console *con) + { + unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); -+#define rwbase_pre_schedule() \ -+ rt_mutex_pre_schedule() -+ - #define rwbase_schedule() \ -- schedule() -+ rt_mutex_schedule() -+ -+#define rwbase_post_schedule() \ -+ rt_mutex_post_schedule() +- return __nbcon_seq_to_seq(nbcon_seq); ++ return __ulseq_to_u64seq(prb, nbcon_seq); + } - #include "rwbase_rt.c" + /** +@@ -204,7 +171,7 @@ void nbcon_seq_force(struct console *con, u64 seq) + */ + u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); -diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c -index 48a19ed84..38e292454 100644 ---- a/kernel/locking/spinlock_rt.c -+++ b/kernel/locking/spinlock_rt.c -@@ -37,6 +37,8 @@ +- atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); ++ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); - static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) + /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ + con->seq = 0; +@@ -223,11 +190,11 @@ void nbcon_seq_force(struct console *con, u64 seq) + */ + static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { -+ lockdep_assert(!current->pi_blocked_on); -+ - if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) - rtlock_slowlock(rtm); - } -@@ -184,9 +186,13 @@ static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm) +- unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); ++ unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); + struct console *con = ctxt->console; - #define rwbase_signal_pending_state(state, current) (0) + if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, +- __seq_to_nbcon_seq(new_seq))) { ++ __u64seq_to_ulseq(new_seq))) { + ctxt->seq = new_seq; + } else { + ctxt->seq = nbcon_seq_read(con); +diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h +index 18cd25e489b8..b82a96dc2ea2 100644 +--- a/kernel/printk/printk_ringbuffer.h ++++ b/kernel/printk/printk_ringbuffer.h +@@ -381,4 +381,37 @@ bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, + u64 prb_first_valid_seq(struct printk_ringbuffer *rb); + u64 prb_next_seq(struct printk_ringbuffer *rb); -+#define rwbase_pre_schedule() ++#ifdef CONFIG_64BIT + - #define rwbase_schedule() \ - schedule_rtlock() - -+#define rwbase_post_schedule() ++#define __u64seq_to_ulseq(u64seq) (u64seq) ++#define __ulseq_to_u64seq(rb, ulseq) (ulseq) + - #include "rwbase_rt.c" - /* - * The common functions which get wrapped into the rwlock API. -diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c -index d1473c624..c7196de83 100644 ---- a/kernel/locking/ww_rt_mutex.c -+++ b/kernel/locking/ww_rt_mutex.c -@@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, - } - mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip); - -- if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) { -+ if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) { - if (ww_ctx) - ww_mutex_set_context_fastpath(lock, ww_ctx); - return 0; -diff --git a/kernel/panic.c b/kernel/panic.c -index ef9f9a4e9..9215df21d 100644 ---- a/kernel/panic.c -+++ b/kernel/panic.c -@@ -366,6 +366,8 @@ void panic(const char *fmt, ...) - */ - atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - -+ printk_legacy_allow_panic_sync(); ++#else /* CONFIG_64BIT */ + - panic_print_sys_info(false); - - kmsg_dump(KMSG_DUMP_PANIC); -@@ -449,6 +451,7 @@ void panic(const char *fmt, ...) - * Explicitly flush the kernel log buffer one last time. ++#define __u64seq_to_ulseq(u64seq) ((u32)u64seq) ++ ++static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) ++{ ++ u64 seq; ++ u64 rb_next_seq; ++ ++ /* ++ * The provided sequence is only the lower 32 bits of the ringbuffer ++ * sequence. It needs to be expanded to 64bit. Get the next sequence ++ * number from the ringbuffer and fold it. ++ * ++ * Having a 32bit representation in the console is sufficient. ++ * If a console ever gets more than 2^31 records behind ++ * the ringbuffer then this is the least of the problems. ++ * ++ * Also the access to the ring buffer is always safe. ++ */ ++ rb_next_seq = prb_next_seq(rb); ++ seq = rb_next_seq - ((u32)rb_next_seq - ulseq); ++ ++ return seq; ++} ++ ++#endif /* CONFIG_64BIT */ ++ + #endif /* _KERNEL_PRINTK_RINGBUFFER_H */ +-- +2.51.0 + +From c6c31fd92dce172e917646ca1aca4ca0f90ee08d Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 7 Dec 2023 14:15:15 +0000 +Subject: [PATCH 111/213] printk: Adjust mapping for 32bit seq macros + +Note: This change only applies to 32bit architectures. On 64bit + architectures the macros are NOPs. + +__ulseq_to_u64seq() computes the upper 32 bits of the passed +argument value (@ulseq). The upper bits are derived from a base +value (@rb_next_seq) in a way that assumes @ulseq represents a +64bit number that is less than or equal to @rb_next_seq. + +Until now this mapping has been correct for all call sites. However, +in a follow-up commit, values of @ulseq will be passed in that are +higher than the base value. This requires a change to how the 32bit +value is mapped to a 64bit sequence number. + +Rather than mapping @ulseq such that the base value is the end of a +32bit block, map @ulseq such that the base value is in the middle of +a 32bit block. This allows supporting 31 bits before and after the +base value, which is deemed acceptable for the console sequence +number during runtime. + +Here is an example to illustrate the previous and new mappings. + +For a base value (@rb_next_seq) of 2 2000 0000... + +Before this change the range of possible return values was: + +1 2000 0001 to 2 2000 0000 + +__ulseq_to_u64seq(1fff ffff) => 2 1fff ffff +__ulseq_to_u64seq(2000 0000) => 2 2000 0000 +__ulseq_to_u64seq(2000 0001) => 1 2000 0001 +__ulseq_to_u64seq(9fff ffff) => 1 9fff ffff +__ulseq_to_u64seq(a000 0000) => 1 a000 0000 +__ulseq_to_u64seq(a000 0001) => 1 a000 0001 + +After this change the range of possible return values are: +1 a000 0001 to 2 a000 0000 + +__ulseq_to_u64seq(1fff ffff) => 2 1fff ffff +__ulseq_to_u64seq(2000 0000) => 2 2000 0000 +__ulseq_to_u64seq(2000 0001) => 2 2000 0001 +__ulseq_to_u64seq(9fff ffff) => 2 9fff ffff +__ulseq_to_u64seq(a000 0000) => 2 a000 0000 +__ulseq_to_u64seq(a000 0001) => 1 a000 0001 + +[ john.ogness: Rewrite commit message. ] + +Reported-by: Francesco Dolcini +Reported-by: kernel test robot +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h +index b82a96dc2ea2..12f60c782e46 100644 +--- a/kernel/printk/printk_ringbuffer.h ++++ b/kernel/printk/printk_ringbuffer.h +@@ -407,7 +407,7 @@ static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) + * Also the access to the ring buffer is always safe. */ - console_flush_on_panic(CONSOLE_FLUSH_PENDING); -+ nbcon_atomic_flush_unsafe(); + rb_next_seq = prb_next_seq(rb); +- seq = rb_next_seq - ((u32)rb_next_seq - ulseq); ++ seq = rb_next_seq - (s32)((u32)rb_next_seq - ulseq); - local_irq_enable(); - for (i = 0; ; i += PANIC_TIMER_STEP) { -@@ -627,6 +630,7 @@ bool oops_may_print(void) - */ - void oops_enter(void) - { -+ nbcon_cpu_emergency_enter(); - tracing_off(); - /* can't trust the integrity of the kernel anymore: */ - debug_locks_off(); -@@ -649,6 +653,7 @@ void oops_exit(void) - { - do_oops_enter_exit(); - print_oops_end_marker(); -+ nbcon_cpu_emergency_exit(); - kmsg_dump(KMSG_DUMP_OOPS); + return seq; + } +-- +2.51.0 + +From fcbf226d760af9e34121e21f70f0a0ad478e2387 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 22 Nov 2023 16:13:37 +0000 +Subject: [PATCH 112/213] printk: Use prb_first_seq() as base for 32bit seq + macros + +Note: This change only applies to 32bit architectures. On 64bit + architectures the macros are NOPs. + +Currently prb_next_seq() is used as the base for the 32bit seq +macros __u64seq_to_ulseq() and __ulseq_to_u64seq(). However, in +a follow-up commit, prb_next_seq() will need to make use of the +32bit seq macros. + +Use prb_first_seq() as the base for the 32bit seq macros instead +because it is guaranteed to return 64bit sequence numbers without +relying on any 32bit seq macros. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.c | 2 +- + kernel/printk/printk_ringbuffer.h | 8 ++++---- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index fde338606ce8..49a82ccce8e9 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -1832,7 +1832,7 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq, } -@@ -660,6 +665,8 @@ struct warn_args { - void __warn(const char *file, int line, void *caller, unsigned taint, - struct pt_regs *regs, struct warn_args *args) + /* Get the sequence number of the tail descriptor. */ +-static u64 prb_first_seq(struct printk_ringbuffer *rb) ++u64 prb_first_seq(struct printk_ringbuffer *rb) { -+ nbcon_cpu_emergency_enter(); -+ - disable_trace_on_warning(); - - if (file) -@@ -690,6 +697,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + struct prb_desc_ring *desc_ring = &rb->desc_ring; + enum desc_state d_state; +diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h +index 12f60c782e46..ee294aaf4aeb 100644 +--- a/kernel/printk/printk_ringbuffer.h ++++ b/kernel/printk/printk_ringbuffer.h +@@ -378,6 +378,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, + bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, + struct printk_info *info, unsigned int *line_count); - /* Just a warning, don't kill lockdep. */ - add_taint(taint, LOCKDEP_STILL_OK); -+ -+ nbcon_cpu_emergency_exit(); - } ++u64 prb_first_seq(struct printk_ringbuffer *rb); + u64 prb_first_valid_seq(struct printk_ringbuffer *rb); + u64 prb_next_seq(struct printk_ringbuffer *rb); - #ifdef CONFIG_BUG -diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile -index f5b388e81..39a2b61c7 100644 ---- a/kernel/printk/Makefile -+++ b/kernel/printk/Makefile -@@ -1,6 +1,6 @@ - # SPDX-License-Identifier: GPL-2.0-only - obj-y = printk.o --obj-$(CONFIG_PRINTK) += printk_safe.o -+obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o - obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o - obj-$(CONFIG_PRINTK_INDEX) += index.o +@@ -392,12 +393,12 @@ u64 prb_next_seq(struct printk_ringbuffer *rb); -diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h -index 7d4979d5c..7db6992c5 100644 ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -3,6 +3,8 @@ - * internal.h - printk internal definitions - */ - #include -+#include -+#include "printk_ringbuffer.h" + static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) + { ++ u64 rb_first_seq = prb_first_seq(rb); + u64 seq; +- u64 rb_next_seq; - #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) - void __init printk_sysctl_init(void); -@@ -12,6 +14,12 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, - #define printk_sysctl_init() do { } while (0) - #endif + /* + * The provided sequence is only the lower 32 bits of the ringbuffer +- * sequence. It needs to be expanded to 64bit. Get the next sequence ++ * sequence. It needs to be expanded to 64bit. Get the first sequence + * number from the ringbuffer and fold it. + * + * Having a 32bit representation in the console is sufficient. +@@ -406,8 +407,7 @@ static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) + * + * Also the access to the ring buffer is always safe. + */ +- rb_next_seq = prb_next_seq(rb); +- seq = rb_next_seq - (s32)((u32)rb_next_seq - ulseq); ++ seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq); -+#define con_printk(lvl, con, fmt, ...) \ -+ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ -+ (con->flags & CON_NBCON) ? "" : "legacy ", \ -+ (con->flags & CON_BOOT) ? "boot" : "", \ -+ con->name, con->index, ##__VA_ARGS__) -+ - #ifdef CONFIG_PRINTK + return seq; + } +-- +2.51.0 + +From f5f5dd5048687a5c59f7e4f22e3bfe1cf3219327 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Thu, 19 Oct 2023 10:32:05 +0000 +Subject: [PATCH 113/213] printk: ringbuffer: Do not skip non-finalized records + with prb_next_seq() + +Commit f244b4dc53e5 ("printk: ringbuffer: Improve +prb_next_seq() performance") introduced an optimization for +prb_next_seq() by using best-effort to track recently finalized +records. However, the order of finalization does not +necessarily match the order of the records. The optimization +changed prb_next_seq() to return inconsistent results, possibly +yielding sequence numbers that are not available to readers +because they are preceded by non-finalized records or they are +not yet visible to the reader CPU. + +Rather than simply best-effort tracking recently finalized +records, force the committing writer to read records and +increment the last "contiguous block" of finalized records. In +order to do this, the sequence number instead of ID must be +stored because ID's cannot be directly compared. + +A new memory barrier pair is introduced to guarantee that a +reader can always read the records up until the sequence number +returned by prb_next_seq() (unless the records have since +been overwritten in the ringbuffer). + +This restores the original functionality of prb_next_seq() +while also keeping the optimization. + +For 32bit systems, only the lower 32 bits of the sequence +number are stored. When reading the value, it is expanded to +the full 64bit sequence number using the 32bit seq macros, +which fold in the value returned by prb_first_seq(). + +Fixes: f244b4dc53e5 ("printk: ringbuffer: Improve prb_next_seq() performance") +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.c | 164 +++++++++++++++++++++++------- + kernel/printk/printk_ringbuffer.h | 4 +- + 2 files changed, 127 insertions(+), 41 deletions(-) + +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index 49a82ccce8e9..04c26cca546f 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -6,6 +6,7 @@ + #include + #include + #include "printk_ringbuffer.h" ++#include "internal.h" - #ifdef CONFIG_PRINTK_CALLER -@@ -35,6 +43,19 @@ enum printk_info_flags { - LOG_CONT = 8, /* text is a fragment of a continuation line */ - }; + /** + * DOC: printk_ringbuffer overview +@@ -303,6 +304,9 @@ + * + * desc_push_tail:B / desc_reserve:D + * set descriptor reusable (state), then push descriptor tail (id) ++ * ++ * desc_update_last_finalized:A / desc_last_finalized_seq:A ++ * store finalized record, then set new highest finalized sequence number + */ + + #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) +@@ -1441,20 +1445,118 @@ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer + return false; + } -+extern struct printk_ringbuffer *prb; -+extern bool printk_threads_enabled; -+extern bool have_legacy_console; -+extern bool have_boot_console; -+ +/* -+ * Specifies if the console lock/unlock dance is needed for console -+ * printing. If @have_boot_console is true, the nbcon consoles will -+ * be printed serially along with the legacy consoles because nbcon -+ * consoles cannot print simultaneously with boot consoles. ++ * @last_finalized_seq value guarantees that all records up to and including ++ * this sequence number are finalized and can be read. The only exception are ++ * too old records which have already been overwritten. ++ * ++ * It is also guaranteed that @last_finalized_seq only increases. ++ * ++ * Be aware that finalized records following non-finalized records are not ++ * reported because they are not yet available to the reader. For example, ++ * a new record stored via printk() will not be available to a printer if ++ * it follows a record that has not been finalized yet. However, once that ++ * non-finalized record becomes finalized, @last_finalized_seq will be ++ * appropriately updated and the full set of finalized records will be ++ * available to the printer. And since each printk() caller will either ++ * directly print or trigger deferred printing of all available unprinted ++ * records, all printk() messages will get printed. + */ -+#define printing_via_unlock (have_legacy_console || have_boot_console) ++static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) ++{ ++ struct prb_desc_ring *desc_ring = &rb->desc_ring; ++ unsigned long ulseq; + - __printf(4, 0) - int vprintk_store(int facility, int level, - const struct dev_printk_info *dev_info, -@@ -61,12 +82,90 @@ void defer_console_output(void); - - u16 printk_parse_prefix(const char *text, int *level, - enum printk_info_flags *flags); -+void console_lock_spinning_enable(void); -+int console_lock_spinning_disable_and_check(int cookie); ++ /* ++ * Guarantee the sequence number is loaded before loading the ++ * associated record in order to guarantee that the record can be ++ * seen by this CPU. This pairs with desc_update_last_finalized:A. ++ */ ++ ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq ++ ); /* LMM(desc_last_finalized_seq:A) */ + -+u64 nbcon_seq_read(struct console *con); -+void nbcon_seq_force(struct console *con, u64 seq); -+bool nbcon_alloc(struct console *con); -+void nbcon_init(struct console *con); -+void nbcon_free(struct console *con); -+enum nbcon_prio nbcon_get_default_prio(void); -+void nbcon_atomic_flush_all(void); -+bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie); -+void nbcon_kthread_create(struct console *con); -+void nbcon_wake_threads(void); -+void nbcon_legacy_kthread_create(void); ++ return __ulseq_to_u64seq(rb, ulseq); ++} ++ ++static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, ++ struct printk_record *r, unsigned int *line_count); + +/* -+ * Check if the given console is currently capable and allowed to print -+ * records. Note that this function does not consider the current context, -+ * which can also play a role in deciding if @con can be used to print -+ * records. ++ * Check if there are records directly following @last_finalized_seq that are ++ * finalized. If so, update @last_finalized_seq to the latest of these ++ * records. It is not allowed to skip over records that are not yet finalized. + */ -+static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) ++static void desc_update_last_finalized(struct printk_ringbuffer *rb) +{ -+ if (!(flags & CON_ENABLED)) -+ return false; ++ struct prb_desc_ring *desc_ring = &rb->desc_ring; ++ u64 old_seq = desc_last_finalized_seq(rb); ++ unsigned long oldval; ++ unsigned long newval; ++ u64 finalized_seq; ++ u64 try_seq; + -+ if ((flags & CON_SUSPENDED)) -+ return false; ++try_again: ++ finalized_seq = old_seq; ++ try_seq = finalized_seq + 1; + -+ if (flags & CON_NBCON) { -+ if (use_atomic) { -+ if (!con->write_atomic) -+ return false; -+ } else { -+ if (!con->write_thread || !con->kthread) -+ return false; -+ } -+ } else { -+ if (!con->write) -+ return false; ++ /* Try to find later finalized records. */ ++ while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { ++ finalized_seq = try_seq; ++ try_seq++; + } + ++ /* No update needed if no later finalized record was found. */ ++ if (finalized_seq == old_seq) ++ return; ++ ++ oldval = __u64seq_to_ulseq(old_seq); ++ newval = __u64seq_to_ulseq(finalized_seq); ++ + /* -+ * Console drivers may assume that per-cpu resources have been -+ * allocated. So unless they're explicitly marked as being able to -+ * cope (CON_ANYTIME) don't call them until this CPU is officially up. ++ * Set the sequence number of a later finalized record that has been ++ * seen. ++ * ++ * Guarantee the record data is visible to other CPUs before storing ++ * its sequence number. This pairs with desc_last_finalized_seq:A. ++ * ++ * Memory barrier involvement: ++ * ++ * If desc_last_finalized_seq:A reads from ++ * desc_update_last_finalized:A, then desc_read:A reads from ++ * _prb_commit:B. ++ * ++ * Relies on: ++ * ++ * RELEASE from _prb_commit:B to desc_update_last_finalized:A ++ * matching ++ * ACQUIRE from desc_last_finalized_seq:A to desc_read:A ++ * ++ * Note: _prb_commit:B and desc_update_last_finalized:A can be ++ * different CPUs. However, the desc_update_last_finalized:A ++ * CPU (which performs the release) must have previously seen ++ * _prb_commit:B. + */ -+ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) -+ return false; -+ -+ return true; ++ if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, ++ &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ ++ old_seq = __ulseq_to_u64seq(rb, oldval); ++ goto try_again; ++ } +} + -+/** -+ * nbcon_kthread_wake - Wake up a printk thread -+ * @con: Console to operate on -+ */ -+static inline void nbcon_kthread_wake(struct console *con) -+{ + /* + * Attempt to finalize a specified descriptor. If this fails, the descriptor + * is either already final or it will finalize itself when the writer commits. + */ +-static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) ++static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) + { ++ struct prb_desc_ring *desc_ring = &rb->desc_ring; + unsigned long prev_state_val = DESC_SV(id, desc_committed); + struct prb_desc *d = to_desc(desc_ring, id); + +- atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, +- DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ +- +- /* Best effort to remember the last finalized @id. */ +- atomic_long_set(&desc_ring->last_finalized_id, id); ++ if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, ++ DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ ++ desc_update_last_finalized(rb); ++ } + } + + /** +@@ -1550,7 +1652,7 @@ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, + * readers. (For seq==0 there is no previous descriptor.) + */ + if (info->seq > 0) +- desc_make_final(desc_ring, DESC_ID(id - 1)); ++ desc_make_final(rb, DESC_ID(id - 1)); + + r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); + /* If text data allocation fails, a data-less record is committed. */ +@@ -1643,7 +1745,7 @@ void prb_commit(struct prb_reserved_entry *e) + */ + head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ + if (head_id != e->id) +- desc_make_final(desc_ring, e->id); ++ desc_make_final(e->rb, e->id); + } + + /** +@@ -1663,12 +1765,9 @@ void prb_commit(struct prb_reserved_entry *e) + */ + void prb_final_commit(struct prb_reserved_entry *e) + { +- struct prb_desc_ring *desc_ring = &e->rb->desc_ring; +- + _prb_commit(e, desc_finalized); + +- /* Best effort to remember the last finalized @id. */ +- atomic_long_set(&desc_ring->last_finalized_id, e->id); ++ desc_update_last_finalized(e->rb); + } + + /* +@@ -2008,7 +2107,9 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) + * newest sequence number available to readers will be. + * + * This provides readers a sequence number to jump to if all currently +- * available records should be skipped. ++ * available records should be skipped. It is guaranteed that all records ++ * previous to the returned value have been finalized and are (or were) ++ * available to the reader. + * + * Context: Any context. + * Return: The sequence number of the next newest (not yet available) record +@@ -2016,34 +2117,19 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) + */ + u64 prb_next_seq(struct printk_ringbuffer *rb) + { +- struct prb_desc_ring *desc_ring = &rb->desc_ring; +- enum desc_state d_state; +- unsigned long id; + u64 seq; + +- /* Check if the cached @id still points to a valid @seq. */ +- id = atomic_long_read(&desc_ring->last_finalized_id); +- d_state = desc_read(desc_ring, id, NULL, &seq, NULL); ++ seq = desc_last_finalized_seq(rb); + +- if (d_state == desc_finalized || d_state == desc_reusable) { +- /* +- * Begin searching after the last finalized record. +- * +- * On 0, the search must begin at 0 because of hack#2 +- * of the bootstrapping phase it is not known if a +- * record at index 0 exists. +- */ +- if (seq != 0) +- seq++; +- } else { +- /* +- * The information about the last finalized sequence number +- * has gone. It should happen only when there is a flood of +- * new messages and the ringbuffer is rapidly recycled. +- * Give up and start from the beginning. +- */ +- seq = 0; +- } + /* -+ * Guarantee any new records can be seen by tasks preparing to wait -+ * before this context checks if the rcuwait is empty. -+ * -+ * The full memory barrier in rcuwait_wake_up() pairs with the full -+ * memory barrier within set_current_state() of -+ * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait() -+ * adds the waiter but before it has checked the wait condition. ++ * Begin searching after the last finalized record. + * -+ * This pairs with nbcon_kthread_func:A. ++ * On 0, the search must begin at 0 because of hack#2 ++ * of the bootstrapping phase it is not known if a ++ * record at index 0 exists. + */ -+ rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */ -+} -+ - #else ++ if (seq != 0) ++ seq++; - #define PRINTK_PREFIX_MAX 0 - #define PRINTK_MESSAGE_MAX 0 - #define PRINTKRB_RECORD_MAX 0 + /* + * The information about the last finalized @seq might be inaccurate. +@@ -2085,7 +2171,7 @@ void prb_init(struct printk_ringbuffer *rb, + rb->desc_ring.infos = infos; + atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); + atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); +- atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); ++ atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); + + rb->text_data_ring.size_bits = textbits; + rb->text_data_ring.data = text_buf; +diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h +index ee294aaf4aeb..2d948cc82b5b 100644 +--- a/kernel/printk/printk_ringbuffer.h ++++ b/kernel/printk/printk_ringbuffer.h +@@ -75,7 +75,7 @@ struct prb_desc_ring { + struct printk_info *infos; + atomic_long_t head_id; + atomic_long_t tail_id; +- atomic_long_t last_finalized_id; ++ atomic_long_t last_finalized_seq; + }; -+static inline void nbcon_kthread_wake(struct console *con) { } -+static inline void nbcon_kthread_create(struct console *con) { } -+#define printk_threads_enabled (false) -+#define printing_via_unlock (false) -+ /* - * In !PRINTK builds we still export console_sem - * semaphore and some of console functions (console_unlock()/etc.), so -@@ -76,8 +175,23 @@ u16 printk_parse_prefix(const char *text, int *level, - #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) +@@ -259,7 +259,7 @@ static struct printk_ringbuffer name = { \ + .infos = &_##name##_infos[0], \ + .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ + .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ +- .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \ ++ .last_finalized_seq = ATOMIC_INIT(0), \ + }, \ + .text_data_ring = { \ + .size_bits = (avgtextbits) + (descbits), \ +-- +2.51.0 + +From e62f200afba5a6b9c72ae5cf6c06a916f3101d87 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 23 Oct 2023 11:11:05 +0000 +Subject: [PATCH 114/213] printk: ringbuffer: Clarify special lpos values + +For empty line records, no data blocks are created. Instead, +these valid records are identified by special logical position +values (in fields of @prb_desc.text_blk_lpos). + +Currently the macro NO_LPOS is used for empty line records. +This name is confusing because it does not imply _why_ there is +no data block. + +Rename NO_LPOS to EMPTY_LINE_LPOS so that it is clear why there +is no data block. + +Also add comments explaining the use of EMPTY_LINE_LPOS as well +as clarification to the values used to represent data-less +blocks. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.c | 20 ++++++++++++++++---- + kernel/printk/printk_ringbuffer.h | 16 +++++++++++++++- + 2 files changed, 31 insertions(+), 5 deletions(-) + +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index 04c26cca546f..244d991ffd73 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -1034,9 +1034,13 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, + unsigned long next_lpos; - static inline bool printk_percpu_data_ready(void) { return false; } -+static inline u64 nbcon_seq_read(struct console *con) { return 0; } -+static inline void nbcon_seq_force(struct console *con, u64 seq) { } -+static inline bool nbcon_alloc(struct console *con) { return false; } -+static inline void nbcon_init(struct console *con) { } -+static inline void nbcon_free(struct console *con) { } -+static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } -+static inline void nbcon_atomic_flush_all(void) { } -+static inline bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, -+ int cookie) { return false; } -+ -+static inline bool console_is_usable(struct console *con, short flags, -+ bool use_atomic) { return false; } -+ - #endif /* CONFIG_PRINTK */ + if (size == 0) { +- /* Specify a data-less block. */ +- blk_lpos->begin = NO_LPOS; +- blk_lpos->next = NO_LPOS; ++ /* ++ * Data blocks are not created for empty lines. Instead, the ++ * reader will recognize these special lpos values and handle ++ * it appropriately. ++ */ ++ blk_lpos->begin = EMPTY_LINE_LPOS; ++ blk_lpos->next = EMPTY_LINE_LPOS; + return NULL; + } -+extern struct printk_buffers printk_shared_pbufs; -+ - /** - * struct printk_buffers - Buffers to read/format/output printk messages. - * @outbuf: After formatting, contains text to output. -@@ -105,3 +219,10 @@ struct printk_message { - }; +@@ -1214,10 +1218,18 @@ static const char *get_data(struct prb_data_ring *data_ring, - bool other_cpu_in_panic(void); -+bool this_cpu_in_panic(void); -+bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -+ bool is_extended, bool may_supress); -+ -+#ifdef CONFIG_PRINTK -+void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); -+#endif -diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c -new file mode 100644 -index 000000000..b4278854e ---- /dev/null -+++ b/kernel/printk/nbcon.c -@@ -0,0 +1,1659 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+// Copyright (C) 2022 Linutronix GmbH, John Ogness -+// Copyright (C) 2022 Intel, Thomas Gleixner -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "printk_ringbuffer.h" -+#include "internal.h" -+/* -+ * Printk console printing implementation for consoles which does not depend -+ * on the legacy style console_lock mechanism. -+ * -+ * The state of the console is maintained in the "nbcon_state" atomic -+ * variable. -+ * -+ * The console is locked when: -+ * -+ * - The 'prio' field contains the priority of the context that owns the -+ * console. Only higher priority contexts are allowed to take over the -+ * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. -+ * -+ * - The 'cpu' field denotes on which CPU the console is locked. It is used -+ * to prevent busy waiting on the same CPU. Also it informs the lock owner -+ * that it has lost the lock in a more complex scenario when the lock was -+ * taken over by a higher priority context, released, and taken on another -+ * CPU with the same priority as the interrupted owner. -+ * -+ * The acquire mechanism uses a few more fields: -+ * -+ * - The 'req_prio' field is used by the handover approach to make the -+ * current owner aware that there is a context with a higher priority -+ * waiting for the friendly handover. -+ * -+ * - The 'unsafe' field allows to take over the console in a safe way in the -+ * middle of emitting a message. The field is set only when accessing some -+ * shared resources or when the console device is manipulated. It can be -+ * cleared, for example, after emitting one character when the console -+ * device is in a consistent state. -+ * -+ * - The 'unsafe_takeover' field is set when a hostile takeover took the -+ * console in an unsafe state. The console will stay in the unsafe state -+ * until re-initialized. -+ * -+ * The acquire mechanism uses three approaches: -+ * -+ * 1) Direct acquire when the console is not owned or is owned by a lower -+ * priority context and is in a safe state. -+ * -+ * 2) Friendly handover mechanism uses a request/grant handshake. It is used -+ * when the current owner has lower priority and the console is in an -+ * unsafe state. -+ * -+ * The requesting context: -+ * -+ * a) Sets its priority into the 'req_prio' field. -+ * -+ * b) Waits (with a timeout) for the owning context to unlock the -+ * console. -+ * -+ * c) Takes the lock and clears the 'req_prio' field. -+ * -+ * The owning context: -+ * -+ * a) Observes the 'req_prio' field set on exit from the unsafe -+ * console state. -+ * -+ * b) Gives up console ownership by clearing the 'prio' field. -+ * -+ * 3) Unsafe hostile takeover allows to take over the lock even when the -+ * console is an unsafe state. It is used only in panic() by the final -+ * attempt to flush consoles in a try and hope mode. -+ * -+ * Note that separate record buffers are used in panic(). As a result, -+ * the messages can be read and formatted without any risk even after -+ * using the hostile takeover in unsafe state. -+ * -+ * The release function simply clears the 'prio' field. -+ * -+ * All operations on @console::nbcon_state are atomic cmpxchg based to -+ * handle concurrency. -+ * -+ * The acquire/release functions implement only minimal policies: -+ * -+ * - Preference for higher priority contexts. -+ * - Protection of the panic CPU. -+ * -+ * All other policy decisions must be made at the call sites: -+ * -+ * - What is marked as an unsafe section. -+ * - Whether to spin-wait if there is already an owner and the console is -+ * in an unsafe state. -+ * - Whether to attempt an unsafe hostile takeover. -+ * -+ * The design allows to implement the well known: -+ * -+ * acquire() -+ * output_one_printk_record() -+ * release() -+ * -+ * The output of one printk record might be interrupted with a higher priority -+ * context. The new owner is supposed to reprint the entire interrupted record -+ * from scratch. -+ */ -+ -+/** -+ * nbcon_state_set - Helper function to set the console state -+ * @con: Console to update -+ * @new: The new state to write -+ * -+ * Only to be used when the console is not yet or no longer visible in the -+ * system. Otherwise use nbcon_state_try_cmpxchg(). -+ */ -+static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) -+{ -+ atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); -+} + /* Data-less data block description. */ + if (BLK_DATALESS(blk_lpos)) { +- if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { ++ /* ++ * Records that are just empty lines are also valid, even ++ * though they do not have a data block. For such records ++ * explicitly return empty string data to signify success. ++ */ ++ if (blk_lpos->begin == EMPTY_LINE_LPOS && ++ blk_lpos->next == EMPTY_LINE_LPOS) { + *data_size = 0; + return ""; + } + -+/** -+ * nbcon_state_read - Helper function to read the console state -+ * @con: Console to read -+ * @state: The state to store the result ++ /* Data lost, invalid, or otherwise unavailable. */ + return NULL; + } + +diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h +index 2d948cc82b5b..d49460f7578e 100644 +--- a/kernel/printk/printk_ringbuffer.h ++++ b/kernel/printk/printk_ringbuffer.h +@@ -127,8 +127,22 @@ enum desc_state { + #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) + #define DESC_ID_MASK (~DESC_FLAGS_MASK) + #define DESC_ID(sv) ((sv) & DESC_ID_MASK) ++ ++/* ++ * Special data block logical position values (for fields of ++ * @prb_desc.text_blk_lpos). ++ * ++ * - Bit0 is used to identify if the record has no data block. (Implemented in ++ * the LPOS_DATALESS() macro.) ++ * ++ * - Bit1 specifies the reason for not having a data block. ++ * ++ * These special values could never be real lpos values because of the ++ * meta data and alignment padding of data blocks. (See to_blk_size() for ++ * details.) + */ -+static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) + #define FAILED_LPOS 0x1 +-#define NO_LPOS 0x3 ++#define EMPTY_LINE_LPOS 0x3 + + #define FAILED_BLK_LPOS \ + { \ +-- +2.51.0 + +From 7dc9f52abae2963190383dd8342e9ce51f804ccf Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 13 Oct 2023 14:30:49 +0000 +Subject: [PATCH 115/213] printk: Add this_cpu_in_panic() + +There is already panic_in_progress() and other_cpu_in_panic(), +but checking if the current CPU is the panic CPU must still be +open coded. + +Add this_cpu_in_panic() to complete the set. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 1 + + kernel/printk/printk.c | 43 +++++++++++++++++++++------------------- + 2 files changed, 24 insertions(+), 20 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 6c2afee5ef62..ac2d9750e5f8 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -130,6 +130,7 @@ struct printk_message { + }; + + bool other_cpu_in_panic(void); ++bool this_cpu_in_panic(void); + bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_supress); + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 1a3f8dd56fe0..9a4fe982d9b0 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -347,6 +347,29 @@ static bool panic_in_progress(void) + return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); + } + ++/* Return true if a panic is in progress on the current CPU. */ ++bool this_cpu_in_panic(void) +{ -+ state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); ++ /* ++ * We can use raw_smp_processor_id() here because it is impossible for ++ * the task to be migrated to the panic_cpu, or away from it. If ++ * panic_cpu has already been set, and we're not currently executing on ++ * that CPU, then we never will be. ++ */ ++ return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); +} + -+/** -+ * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state -+ * @con: Console to update -+ * @cur: Old/expected state -+ * @new: New state ++/* ++ * Return true if a panic is in progress on a remote CPU. + * -+ * Return: True on success. False on fail and @cur is updated. ++ * On true, the local CPU should immediately release any printing resources ++ * that may be needed by the panic CPU. + */ -+static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, -+ struct nbcon_state *new) ++bool other_cpu_in_panic(void) +{ -+ return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); ++ return (panic_in_progress() && !this_cpu_in_panic()); +} + + /* + * This is used for debugging the mess that is the VT code by + * keeping track if we have the console semaphore held. It's +@@ -2628,26 +2651,6 @@ static int console_cpu_notify(unsigned int cpu) + return 0; + } + +-/* +- * Return true if a panic is in progress on a remote CPU. +- * +- * On true, the local CPU should immediately release any printing resources +- * that may be needed by the panic CPU. +- */ +-bool other_cpu_in_panic(void) +-{ +- if (!panic_in_progress()) +- return false; +- +- /* +- * We can use raw_smp_processor_id() here because it is impossible for +- * the task to be migrated to the panic_cpu, or away from it. If +- * panic_cpu has already been set, and we're not currently executing on +- * that CPU, then we never will be. +- */ +- return atomic_read(&panic_cpu) != raw_smp_processor_id(); +-} +- + /** + * console_lock - block the console subsystem from printing + * +-- +2.51.0 + +From ae31ffb2695d29346d95c4650ae90cf6c7bebfae Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 6 Nov 2023 15:01:58 +0000 +Subject: [PATCH 116/213] printk: ringbuffer: Cleanup reader terminology + +With the lockless ringbuffer, it is allowed that multiple +CPUs/contexts write simultaneously into the buffer. This creates +an ambiguity as some writers will finalize sooner. + +The documentation for the prb_read functions is not clear as it +refers to "not yet written" and "no data available". Clarify the +return values and language to be in terms of the reader: records +available for reading. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index 244d991ffd73..67ee1c62fcd6 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -1987,11 +1987,13 @@ u64 prb_first_seq(struct printk_ringbuffer *rb) + } + + /* +- * Non-blocking read of a record. Updates @seq to the last finalized record +- * (which may have no data available). ++ * Non-blocking read of a record. + * +- * See the description of prb_read_valid() and prb_read_valid_info() +- * for details. ++ * On success @seq is updated to the record that was read and (if provided) ++ * @r and @line_count will contain the read/calculated data. ++ * ++ * On failure @seq is updated to a record that is not yet available to the ++ * reader, but it will be the next record available to the reader. + */ + static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, + struct printk_record *r, unsigned int *line_count) +@@ -2010,7 +2012,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, + *seq = tail_seq; + + } else if (err == -ENOENT) { +- /* Record exists, but no data available. Skip. */ ++ /* Record exists, but the data was lost. Skip. */ + (*seq)++; + + } else { +@@ -2043,7 +2045,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, + * On success, the reader must check r->info.seq to see which record was + * actually read. This allows the reader to detect dropped records. + * +- * Failure means @seq refers to a not yet written record. ++ * Failure means @seq refers to a record not yet available to the reader. + */ + bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, + struct printk_record *r) +@@ -2073,7 +2075,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, + * On success, the reader must check info->seq to see which record meta data + * was actually read. This allows the reader to detect dropped records. + * +- * Failure means @seq refers to a not yet written record. ++ * Failure means @seq refers to a record not yet available to the reader. + */ + bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, + struct printk_info *info, unsigned int *line_count) +-- +2.51.0 + +From b11ea354ad18da4350180f0c4e0fe66e2778af2b Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 6 Nov 2023 14:59:55 +0000 +Subject: [PATCH 117/213] printk: Wait for all reserved records with pr_flush() + +Currently pr_flush() will only wait for records that were +available to readers at the time of the call (using +prb_next_seq()). But there may be more records (non-finalized) +that have following finalized records. pr_flush() should wait +for these to print as well. Particularly because any trailing +finalized records may be the messages that the calling context +wants to ensure are printed. + +Add a new ringbuffer function prb_next_reserve_seq() to return +the sequence number following the most recently reserved record. +This guarantees that pr_flush() will wait until all current +printk() messages (completed or in progress) have been printed. + +Fixes: 3b604ca81202 ("printk: add pr_flush()") +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 2 +- + kernel/printk/printk_ringbuffer.c | 113 ++++++++++++++++++++++++++++++ + kernel/printk/printk_ringbuffer.h | 1 + + 3 files changed, 115 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 9a4fe982d9b0..21e1590faa71 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3816,7 +3816,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + + might_sleep(); + +- seq = prb_next_seq(prb); ++ seq = prb_next_reserve_seq(prb); + + /* Flush the consoles so that records up to @seq are printed. */ + console_lock(); +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index 67ee1c62fcd6..b7748d7c44c1 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -1986,6 +1986,119 @@ u64 prb_first_seq(struct printk_ringbuffer *rb) + return seq; + } + +/** -+ * nbcon_seq_read - Read the current console sequence -+ * @con: Console to read the sequence of ++ * prb_next_reserve_seq() - Get the sequence number after the most recently ++ * reserved record. + * -+ * Return: Sequence number of the next record to print on @con. ++ * @rb: The ringbuffer to get the sequence number from. ++ * ++ * This is the public function available to readers to see what sequence ++ * number will be assigned to the next reserved record. ++ * ++ * Note that depending on the situation, this value can be equal to or ++ * higher than the sequence number returned by prb_next_seq(). ++ * ++ * Context: Any context. ++ * Return: The sequence number that will be assigned to the next record ++ * reserved. + */ -+u64 nbcon_seq_read(struct console *con) ++u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) +{ -+ unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); ++ struct prb_desc_ring *desc_ring = &rb->desc_ring; ++ unsigned long last_finalized_id; ++ atomic_long_t *state_var; ++ u64 last_finalized_seq; ++ unsigned long head_id; ++ struct prb_desc desc; ++ unsigned long diff; ++ struct prb_desc *d; ++ int err; + -+ return __ulseq_to_u64seq(prb, nbcon_seq); ++ /* ++ * It may not be possible to read a sequence number for @head_id. ++ * So the ID of @last_finailzed_seq is used to calculate what the ++ * sequence number of @head_id will be. ++ */ ++ ++try_again: ++ last_finalized_seq = desc_last_finalized_seq(rb); ++ ++ /* ++ * @head_id is loaded after @last_finalized_seq to ensure that it is ++ * at or beyond @last_finalized_seq. ++ * ++ * Memory barrier involvement: ++ * ++ * If desc_last_finalized_seq:A reads from ++ * desc_update_last_finalized:A, then ++ * prb_next_reserve_seq:A reads from desc_reserve:D. ++ * ++ * Relies on: ++ * ++ * RELEASE from desc_reserve:D to desc_update_last_finalized:A ++ * matching ++ * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A ++ * ++ * Note: desc_reserve:D and desc_update_last_finalized:A can be ++ * different CPUs. However, the desc_update_last_finalized:A CPU ++ * (which performs the release) must have previously seen ++ * desc_read:C, which implies desc_reserve:D can be seen. ++ */ ++ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ ++ ++ d = to_desc(desc_ring, last_finalized_seq); ++ state_var = &d->state_var; ++ ++ /* Extract the ID, used to specify the descriptor to read. */ ++ last_finalized_id = DESC_ID(atomic_long_read(state_var)); ++ ++ /* Ensure @last_finalized_id is correct. */ ++ err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); ++ ++ if (err == -EINVAL) { ++ if (last_finalized_seq == 0) { ++ /* ++ * @last_finalized_seq still contains its initial ++ * value. Probably no record has been finalized yet. ++ * This means the ringbuffer is not yet full and the ++ * @head_id value can be used directly (subtracting ++ * off the id value corresponding to seq=0). ++ */ ++ ++ /* ++ * Because of hack#2 of the bootstrapping phase, the ++ * @head_id initial value must be handled separately. ++ */ ++ if (head_id == DESC0_ID(desc_ring->count_bits)) ++ return 0; ++ ++ /* ++ * The @head_id is initialized such that the first ++ * increment will yield the first record (seq=0). ++ * Therefore use the initial value +1 as the base to ++ * subtract from @head_id. ++ */ ++ last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; ++ } else { ++ /* Record must have been overwritten. Try again. */ ++ goto try_again; ++ } ++ } ++ ++ /* ++ * @diff is the number of records beyond the last record available ++ * to readers. ++ */ ++ diff = head_id - last_finalized_id; ++ ++ /* ++ * @head_id points to the most recently reserved record, but this ++ * function returns the sequence number that will be assigned to the ++ * next (not yet reserved) record. Thus +1 is needed. ++ */ ++ return (last_finalized_seq + diff + 1); +} + -+/** -+ * nbcon_seq_force - Force console sequence to a specific value -+ * @con: Console to work on -+ * @seq: Sequence number value to set + /* + * Non-blocking read of a record. + * +diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h +index d49460f7578e..52626d0f1fa3 100644 +--- a/kernel/printk/printk_ringbuffer.h ++++ b/kernel/printk/printk_ringbuffer.h +@@ -395,6 +395,7 @@ bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, + u64 prb_first_seq(struct printk_ringbuffer *rb); + u64 prb_first_valid_seq(struct printk_ringbuffer *rb); + u64 prb_next_seq(struct printk_ringbuffer *rb); ++u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); + + #ifdef CONFIG_64BIT + +-- +2.51.0 + +From d547cb429af469c0f59c323f09c8ec1341bce15a Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 13 Oct 2023 10:23:11 +0000 +Subject: [PATCH 118/213] printk: ringbuffer: Skip non-finalized records in + panic + +Normally a reader will stop once reaching a non-finalized +record. However, when a panic happens, writers from other CPUs +(or an interrupted context on the panic CPU) may have been +writing a record and were unable to finalize it. The panic CPU +will reserve/commit/finalize its panic records, but these will +be located after the non-finalized records. This results in +panic() not flushing the panic messages. + +Extend _prb_read_valid() to skip over non-finalized records if +on the panic CPU. + +Fixes: 896fbe20b4e2 ("printk: use the lockless ringbuffer") +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.c | 28 ++++++++++++++++++++++++++-- + 1 file changed, 26 insertions(+), 2 deletions(-) + +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index b7748d7c44c1..d6ed33683b8b 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -2107,6 +2107,10 @@ u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) + * + * On failure @seq is updated to a record that is not yet available to the + * reader, but it will be the next record available to the reader. + * -+ * Only to be used during init (before registration) or in extreme situations -+ * (such as panic with CONSOLE_REPLAY_ALL). -+ */ -+void nbcon_seq_force(struct console *con, u64 seq) -+{ ++ * Note: When the current CPU is in panic, this function will skip over any ++ * non-existent/non-finalized records in order to allow the panic CPU ++ * to print any and all records that have been finalized. + */ + static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, + struct printk_record *r, unsigned int *line_count) +@@ -2129,8 +2133,28 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, + (*seq)++; + + } else { +- /* Non-existent/non-finalized record. Must stop. */ +- return false; ++ /* ++ * Non-existent/non-finalized record. Must stop. ++ * ++ * For panic situations it cannot be expected that ++ * non-finalized records will become finalized. But ++ * there may be other finalized records beyond that ++ * need to be printed for a panic situation. If this ++ * is the panic CPU, skip this ++ * non-existent/non-finalized record unless it is ++ * at or beyond the head, in which case it is not ++ * possible to continue. ++ * ++ * Note that new messages printed on panic CPU are ++ * finalized when we are here. The only exception ++ * might be the last message without trailing newline. ++ * But it would have the sequence number returned ++ * by "prb_next_reserve_seq() - 1". ++ */ ++ if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) ++ (*seq)++; ++ else ++ return false; + } + } + +-- +2.51.0 + +From 603743015e72900589dc784c5155271784485d1b Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 20 Nov 2023 12:46:35 +0100 +Subject: [PATCH 119/213] printk: ringbuffer: Consider committed as finalized + in panic + +A descriptor in the committed state means the record does not yet +exist for the reader. However, for the panic CPU, committed +records should be handled as finalized records since they contain +message data in a consistent state and may contain additional +hints as to the cause of the panic. + +Add an exception for records in the commit state to not be +considered non-existing when reading from the panic CPU. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk_ringbuffer.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c +index d6ed33683b8b..e7b808b829a0 100644 +--- a/kernel/printk/printk_ringbuffer.c ++++ b/kernel/printk/printk_ringbuffer.c +@@ -1857,6 +1857,8 @@ static bool copy_data(struct prb_data_ring *data_ring, + * descriptor. However, it also verifies that the record is finalized and has + * the sequence number @seq. On success, 0 is returned. + * ++ * For the panic CPU, committed descriptors are also considered finalized. ++ * + * Error return values: + * -EINVAL: A finalized record with sequence number @seq does not exist. + * -ENOENT: A finalized record with sequence number @seq exists, but its data +@@ -1875,16 +1877,25 @@ static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, + + /* + * An unexpected @id (desc_miss) or @seq mismatch means the record +- * does not exist. A descriptor in the reserved or committed state +- * means the record does not yet exist for the reader. ++ * does not exist. A descriptor in the reserved state means the ++ * record does not yet exist for the reader. + */ + if (d_state == desc_miss || + d_state == desc_reserved || +- d_state == desc_committed || + s != seq) { + return -EINVAL; + } + + /* -+ * If the specified record no longer exists, the oldest available record -+ * is chosen. This is especially important on 32bit systems because only -+ * the lower 32 bits of the sequence number are stored. The upper 32 bits -+ * are derived from the sequence numbers available in the ringbuffer. ++ * A descriptor in the committed state means the record does not yet ++ * exist for the reader. However, for the panic CPU, committed ++ * records are also handled as finalized records since they contain ++ * message data in a consistent state and may contain additional ++ * hints as to the cause of the panic. + */ -+ u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); ++ if (d_state == desc_committed && !this_cpu_in_panic()) ++ return -EINVAL; + -+ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); + /* + * A descriptor in the reusable state may no longer have its data + * available; report it as existing but with lost data. Or the record +-- +2.51.0 + +From 6cc8e765b389ad26a3a7317f642cb23f03344ba2 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 20 Oct 2023 09:37:05 +0000 +Subject: [PATCH 120/213] printk: Avoid non-panic CPUs writing to ringbuffer + +Commit 13fb0f74d702 ("printk: Avoid livelock with heavy printk +during panic") introduced a mechanism to silence non-panic CPUs +if too many messages are being dropped. Aside from trying to +workaround the livelock bugs of legacy consoles, it was also +intended to avoid losing panic messages. However, if non-panic +CPUs are writing to the ringbuffer, then reacting to dropped +messages is too late. + +To avoid losing panic CPU messages, silence non-panic CPUs +immediately on panic. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 26 ++++++-------------------- + 1 file changed, 6 insertions(+), 20 deletions(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 21e1590faa71..374aff25969c 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -462,12 +462,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + static DEFINE_MUTEX(syslog_lock); + + #ifdef CONFIG_PRINTK +-/* +- * During panic, heavy printk by other CPUs can delay the +- * panic and risk deadlock on console resources. +- */ +-static int __read_mostly suppress_panic_printk; +- + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ +@@ -2329,7 +2323,12 @@ asmlinkage int vprintk_emit(int facility, int level, + if (unlikely(suppress_printk)) + return 0; + +- if (unlikely(suppress_panic_printk) && other_cpu_in_panic()) ++ /* ++ * The messages on the panic CPU are the most important. If ++ * non-panic CPUs are generating any messages, they will be ++ * silently dropped. ++ */ ++ if (other_cpu_in_panic()) + return 0; + + if (level == LOGLEVEL_SCHED) { +@@ -2806,8 +2805,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) + bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_suppress) + { +- static int panic_console_dropped; +- + struct printk_buffers *pbufs = pmsg->pbufs; + const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); + const size_t outbuf_sz = sizeof(pbufs->outbuf); +@@ -2835,17 +2832,6 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + pmsg->seq = r.info->seq; + pmsg->dropped = r.info->seq - seq; + +- /* +- * Check for dropped messages in panic here so that printk +- * suppression can occur as early as possible if necessary. +- */ +- if (pmsg->dropped && +- panic_in_progress() && +- panic_console_dropped++ > 10) { +- suppress_panic_printk = 1; +- pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); +- } +- + /* Skip record that has level above the console loglevel. */ + if (may_suppress && suppress_message_printing(r.info->level)) + goto out; +-- +2.51.0 + +From e9bdc5e1905f323934d6f83a5c83bce705d5ee2d Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 22 Nov 2023 11:23:43 +0000 +Subject: [PATCH 121/213] printk: Consider nbcon boot consoles on seq init + +If a non-boot console is registering and boot consoles exist, the +consoles are flushed before being unregistered. This allows the +non-boot console to continue where the boot console left off. + +If for whatever reason flushing fails, the lowest seq found from +any of the enabled boot consoles is used. Until now con->seq was +checked. However, if it is an nbcon boot console, the function +nbcon_seq_read() must be used to read seq because con->seq is +always 0. + +Check if it is an nbcon boot console and if so call +nbcon_seq_read() to read seq. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 374aff25969c..6b2adb30391a 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3423,11 +3423,20 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) + + newcon->seq = prb_next_seq(prb); + for_each_console(con) { +- if ((con->flags & CON_BOOT) && +- (con->flags & CON_ENABLED) && +- con->seq < newcon->seq) { +- newcon->seq = con->seq; ++ u64 seq; + -+ /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ -+ con->seq = 0; -+} ++ if (!((con->flags & CON_BOOT) && ++ (con->flags & CON_ENABLED))) { ++ continue; + } + -+/** -+ * nbcon_seq_try_update - Try to update the console sequence number -+ * @ctxt: Pointer to an acquire context that contains -+ * all information about the acquire mode -+ * @new_seq: The new sequence number to set -+ * -+ * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to -+ * the 64bit value). This could be a different value than @new_seq if -+ * nbcon_seq_force() was used or the current context no longer owns the -+ * console. In the later case, it will stop printing anyway. -+ */ -+static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) ++ if (con->flags & CON_NBCON) ++ seq = nbcon_seq_read(con); ++ else ++ seq = con->seq; ++ ++ if (seq < newcon->seq) ++ newcon->seq = seq; + } + } + +-- +2.51.0 + +From 0da5fb8e5820e13e201b80bc5b4361904a31635a Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 9 Oct 2023 13:55:19 +0000 +Subject: [PATCH 122/213] printk: Add sparse notation to console_srcu locking + +kernel/printk/printk.c:284:5: sparse: sparse: context imbalance in +'console_srcu_read_lock' - wrong count at exit +include/linux/srcu.h:301:9: sparse: sparse: context imbalance in +'console_srcu_read_unlock' - unexpected unlock + +Reported-by: kernel test robot +Fixes: 6c4afa79147e ("printk: Prepare for SRCU console list protection") +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 6b2adb30391a..6c6e1f8060af 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -282,6 +282,7 @@ EXPORT_SYMBOL(console_list_unlock); + * Return: A cookie to pass to console_srcu_read_unlock(). + */ + int console_srcu_read_lock(void) ++ __acquires(&console_srcu) + { + return srcu_read_lock_nmisafe(&console_srcu); + } +@@ -295,6 +296,7 @@ EXPORT_SYMBOL(console_srcu_read_lock); + * Counterpart to console_srcu_read_lock() + */ + void console_srcu_read_unlock(int cookie) ++ __releases(&console_srcu) + { + srcu_read_unlock_nmisafe(&console_srcu, cookie); + } +-- +2.51.0 + +From 004f54b0732a9601d7259f0362cfaecf76601e78 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 20 Oct 2023 09:52:59 +0000 +Subject: [PATCH 123/213] printk: nbcon: Ensure ownership release on failed + emit + +Until now it was assumed that ownership has been lost when the +write_atomic() callback fails. nbcon_emit_next_record() only +returns false when ownership has been lost. + +Ensure ownership has been lost before reporting failure by +explicitly attempting a release. If the current context is not +the owner, the release has no effect. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/nbcon.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index c8093bcc01fe..8ecd76aa22e6 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -852,7 +852,7 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + unsigned long con_dropped; + struct nbcon_state cur; + unsigned long dropped; +- bool done; ++ bool done = false; + + /* + * The printk buffers are filled within an unsafe section. This +@@ -891,17 +891,18 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; + +- if (con->write_atomic) { ++ if (con->write_atomic) + done = con->write_atomic(con, wctxt); +- } else { +- nbcon_context_release(ctxt); +- WARN_ON_ONCE(1); +- done = false; +- } + +- /* If not done, the emit was aborted. */ +- if (!done) ++ if (!done) { ++ /* ++ * The emit was aborted, probably due to a loss of ownership. ++ * Ensure ownership was lost or released before reporting the ++ * loss. ++ */ ++ nbcon_context_release(ctxt); + return false; ++ } + + /* + * Since any dropped message was successfully output, reset the +-- +2.51.0 + +From e4ba04079659c86283f98254e14b3d74c739c76d Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 22 Sep 2023 14:58:18 +0000 +Subject: [PATCH 124/213] printk: Check printk_deferred_enter()/_exit() usage + +Add validation that printk_deferred_enter()/_exit() are called in +non-migration contexts. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/printk.h | 7 +++++-- + kernel/printk/printk_safe.c | 12 ++++++++++++ + 2 files changed, 17 insertions(+), 2 deletions(-) + +diff --git a/include/linux/printk.h b/include/linux/printk.h +index e4878bb58f66..d2ffe2b00e00 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -159,13 +159,16 @@ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); + + extern void __printk_safe_enter(void); + extern void __printk_safe_exit(void); ++extern void __printk_deferred_enter(void); ++extern void __printk_deferred_exit(void); ++ + /* + * The printk_deferred_enter/exit macros are available only as a hack for + * some code paths that need to defer all printk console printing. Interrupts + * must be disabled for the deferred duration. + */ +-#define printk_deferred_enter __printk_safe_enter +-#define printk_deferred_exit __printk_safe_exit ++#define printk_deferred_enter() __printk_deferred_enter() ++#define printk_deferred_exit() __printk_deferred_exit() + + /* + * Please don't use printk_ratelimit(), because it shares ratelimiting state +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index 6d10927a07d8..8d9408d653de 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -26,6 +26,18 @@ void __printk_safe_exit(void) + this_cpu_dec(printk_context); + } + ++void __printk_deferred_enter(void) +{ -+ unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); -+ struct console *con = ctxt->console; -+ -+ if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, -+ __u64seq_to_ulseq(new_seq))) { -+ ctxt->seq = new_seq; -+ } else { -+ ctxt->seq = nbcon_seq_read(con); -+ } ++ cant_migrate(); ++ this_cpu_inc(printk_context); +} + -+bool printk_threads_enabled __ro_after_init; -+ -+/** -+ * nbcon_context_try_acquire_direct - Try to acquire directly -+ * @ctxt: The context of the caller -+ * @cur: The current console state -+ * -+ * Acquire the console when it is released. Also acquire the console when -+ * the current owner has a lower priority and the console is in a safe state. -+ * -+ * Return: 0 on success. Otherwise, an error code on failure. Also @cur -+ * is updated to the latest state when failed to modify it. -+ * -+ * Errors: -+ * -+ * -EPERM: A panic is in progress and this is not the panic CPU. -+ * Or the current owner or waiter has the same or higher -+ * priority. No acquire method can be successful in -+ * this case. -+ * -+ * -EBUSY: The current owner has a lower priority but the console -+ * in an unsafe state. The caller should try using -+ * the handover acquire method. -+ */ -+static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, -+ struct nbcon_state *cur) ++void __printk_deferred_exit(void) +{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct nbcon_state new; ++ cant_migrate(); ++ this_cpu_dec(printk_context); ++} + -+ do { -+ if (other_cpu_in_panic()) -+ return -EPERM; + asmlinkage int vprintk(const char *fmt, va_list args) + { + #ifdef CONFIG_KGDB_KDB +-- +2.51.0 + +From 21f681784e9f904447f80680d6251be0392fb30f Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 13 Sep 2023 08:35:23 +0000 +Subject: [PATCH 125/213] printk: nbcon: Implement processing in port->lock + wrapper + +Currently the port->lock wrappers uart_port_lock(), +uart_port_unlock() (and their variants) only lock/unlock +the spin_lock. + +If the port is an nbcon console, the wrappers must also +acquire/release the console and mark the region as unsafe. This +allows general port->lock synchronization to be synchronized +with the nbcon console ownership. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 2 + + include/linux/printk.h | 13 +++++++ + include/linux/serial_core.h | 18 ++++++++- + kernel/printk/nbcon.c | 77 +++++++++++++++++++++++++++++++++++++ + 4 files changed, 108 insertions(+), 2 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 69242c2593d2..6a657533aae1 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -304,6 +304,7 @@ struct nbcon_write_context { + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer ++ * @locked_port: True, if the port lock is locked by nbcon + */ + struct console { + char name[16]; +@@ -330,6 +331,7 @@ struct console { + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; ++ bool locked_port; + }; + + #ifdef CONFIG_LOCKDEP +diff --git a/include/linux/printk.h b/include/linux/printk.h +index d2ffe2b00e00..cdb5e352ffb4 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -9,6 +9,8 @@ + #include + #include + ++struct uart_port; + -+ if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) -+ return -EPERM; + extern const char linux_banner[]; + extern const char linux_proc_banner[]; + +@@ -195,6 +197,8 @@ void show_regs_print_info(const char *log_lvl); + extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; + extern asmlinkage void dump_stack(void) __cold; + void printk_trigger_flush(void); ++extern void nbcon_acquire(struct uart_port *up); ++extern void nbcon_release(struct uart_port *up); + #else + static inline __printf(1, 0) + int vprintk(const char *s, va_list args) +@@ -274,6 +278,15 @@ static inline void dump_stack(void) + static inline void printk_trigger_flush(void) + { + } + -+ if (cur->unsafe) -+ return -EBUSY; ++static inline void nbcon_acquire(struct uart_port *up) ++{ ++} + -+ /* -+ * The console should never be safe for a direct acquire -+ * if an unsafe hostile takeover has ever happened. -+ */ -+ WARN_ON_ONCE(cur->unsafe_takeover); ++static inline void nbcon_release(struct uart_port *up) ++{ ++} + -+ new.atom = cur->atom; -+ new.prio = ctxt->prio; -+ new.req_prio = NBCON_PRIO_NONE; -+ new.unsafe = cur->unsafe_takeover; -+ new.cpu = cpu; + #endif + + #ifdef CONFIG_SMP +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index 71d925e8a79b..6fc2003767a9 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -596,6 +596,7 @@ struct uart_port { + static inline void uart_port_lock(struct uart_port *up) + { + spin_lock(&up->lock); ++ nbcon_acquire(up); + } + + /** +@@ -605,6 +606,7 @@ static inline void uart_port_lock(struct uart_port *up) + static inline void uart_port_lock_irq(struct uart_port *up) + { + spin_lock_irq(&up->lock); ++ nbcon_acquire(up); + } + + /** +@@ -615,6 +617,7 @@ static inline void uart_port_lock_irq(struct uart_port *up) + static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) + { + spin_lock_irqsave(&up->lock, *flags); ++ nbcon_acquire(up); + } + + /** +@@ -625,7 +628,11 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f + */ + static inline bool uart_port_trylock(struct uart_port *up) + { +- return spin_trylock(&up->lock); ++ if (!spin_trylock(&up->lock)) ++ return false; + -+ } while (!nbcon_state_try_cmpxchg(con, cur, &new)); ++ nbcon_acquire(up); ++ return true; + } + + /** +@@ -637,7 +644,11 @@ static inline bool uart_port_trylock(struct uart_port *up) + */ + static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) + { +- return spin_trylock_irqsave(&up->lock, *flags); ++ if (!spin_trylock_irqsave(&up->lock, *flags)) ++ return false; + -+ return 0; -+} ++ nbcon_acquire(up); ++ return true; + } + + /** +@@ -646,6 +657,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long + */ + static inline void uart_port_unlock(struct uart_port *up) + { ++ nbcon_release(up); + spin_unlock(&up->lock); + } + +@@ -655,6 +667,7 @@ static inline void uart_port_unlock(struct uart_port *up) + */ + static inline void uart_port_unlock_irq(struct uart_port *up) + { ++ nbcon_release(up); + spin_unlock_irq(&up->lock); + } + +@@ -665,6 +678,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) + */ + static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) + { ++ nbcon_release(up); + spin_unlock_irqrestore(&up->lock, flags); + } + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 8ecd76aa22e6..a5707fe1e95e 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include "internal.h" + /* + * Printk console printing implementation for consoles which does not depend +@@ -995,3 +996,79 @@ void nbcon_free(struct console *con) + + con->pbufs = NULL; + } + -+static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) ++static inline bool uart_is_nbcon(struct uart_port *up) +{ -+ /* -+ * The request context is well defined by the @req_prio because: -+ * -+ * - Only a context with a higher priority can take over the request. -+ * - There are only three priorities. -+ * - Only one CPU is allowed to request PANIC priority. -+ * - Lower priorities are ignored during panic() until reboot. -+ * -+ * As a result, the following scenario is *not* possible: -+ * -+ * 1. Another context with a higher priority directly takes ownership. -+ * 2. The higher priority context releases the ownership. -+ * 3. A lower priority context takes the ownership. -+ * 4. Another context with the same priority as this context -+ * creates a request and starts waiting. -+ */ ++ int cookie; ++ bool ret; + -+ return (cur->req_prio == expected_prio); ++ if (!uart_console(up)) ++ return false; ++ ++ cookie = console_srcu_read_lock(); ++ ret = (console_srcu_read_flags(up->cons) & CON_NBCON); ++ console_srcu_read_unlock(cookie); ++ return ret; +} + +/** -+ * nbcon_context_try_acquire_requested - Try to acquire after having -+ * requested a handover -+ * @ctxt: The context of the caller -+ * @cur: The current console state -+ * -+ * This is a helper function for nbcon_context_try_acquire_handover(). -+ * It is called when the console is in an unsafe state. The current -+ * owner will release the console on exit from the unsafe region. -+ * -+ * Return: 0 on success and @cur is updated to the new console state. -+ * Otherwise an error code on failure. -+ * -+ * Errors: -+ * -+ * -EPERM: A panic is in progress and this is not the panic CPU -+ * or this context is no longer the waiter. ++ * nbcon_acquire - The second half of the port locking wrapper ++ * @up: The uart port whose @lock was locked + * -+ * -EBUSY: The console is still locked. The caller should -+ * continue waiting. ++ * The uart_port_lock() wrappers will first lock the spin_lock @up->lock. ++ * Then this function is called to implement nbcon-specific processing. + * -+ * Note: The caller must still remove the request when an error has occurred -+ * except when this context is no longer the waiter. ++ * If @up is an nbcon console, this console will be acquired and marked as ++ * unsafe. Otherwise this function does nothing. + */ -+static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, -+ struct nbcon_state *cur) ++void nbcon_acquire(struct uart_port *up) +{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct nbcon_state new; -+ -+ /* Note that the caller must still remove the request! */ -+ if (other_cpu_in_panic()) -+ return -EPERM; -+ -+ /* -+ * Note that the waiter will also change if there was an unsafe -+ * hostile takeover. -+ */ -+ if (!nbcon_waiter_matches(cur, ctxt->prio)) -+ return -EPERM; ++ struct console *con = up->cons; ++ struct nbcon_context ctxt; + -+ /* If still locked, caller should continue waiting. */ -+ if (cur->prio != NBCON_PRIO_NONE) -+ return -EBUSY; ++ if (!uart_is_nbcon(up)) ++ return; + -+ /* -+ * The previous owner should have never released ownership -+ * in an unsafe region. -+ */ -+ WARN_ON_ONCE(cur->unsafe); ++ WARN_ON_ONCE(con->locked_port); + -+ new.atom = cur->atom; -+ new.prio = ctxt->prio; -+ new.req_prio = NBCON_PRIO_NONE; -+ new.unsafe = cur->unsafe_takeover; -+ new.cpu = cpu; ++ do { ++ do { ++ memset(&ctxt, 0, sizeof(ctxt)); ++ ctxt.console = con; ++ ctxt.prio = NBCON_PRIO_NORMAL; ++ } while (!nbcon_context_try_acquire(&ctxt)); + -+ if (!nbcon_state_try_cmpxchg(con, cur, &new)) { -+ /* -+ * The acquire could fail only when it has been taken -+ * over by a higher priority context. -+ */ -+ WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); -+ return -EPERM; -+ } ++ } while (!nbcon_context_enter_unsafe(&ctxt)); + -+ /* Handover success. This context now owns the console. */ -+ return 0; ++ con->locked_port = true; +} ++EXPORT_SYMBOL_GPL(nbcon_acquire); + +/** -+ * nbcon_context_try_acquire_handover - Try to acquire via handover -+ * @ctxt: The context of the caller -+ * @cur: The current console state -+ * -+ * The function must be called only when the context has higher priority -+ * than the current owner and the console is in an unsafe state. -+ * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. -+ * -+ * The function sets "req_prio" field to make the current owner aware of -+ * the request. Then it waits until the current owner releases the console, -+ * or an even higher context takes over the request, or timeout expires. -+ * -+ * The current owner checks the "req_prio" field on exit from the unsafe -+ * region and releases the console. It does not touch the "req_prio" field -+ * so that the console stays reserved for the waiter. -+ * -+ * Return: 0 on success. Otherwise, an error code on failure. Also @cur -+ * is updated to the latest state when failed to modify it. -+ * -+ * Errors: ++ * nbcon_release - The first half of the port unlocking wrapper ++ * @up: The uart port whose @lock is about to be unlocked + * -+ * -EPERM: A panic is in progress and this is not the panic CPU. -+ * Or a higher priority context has taken over the -+ * console or the handover request. ++ * The uart_port_unlock() wrappers will first call this function to implement ++ * nbcon-specific processing. Then afterwards the uart_port_unlock() wrappers ++ * will unlock the spin_lock @up->lock. + * -+ * -EBUSY: The current owner is on the same CPU so that the hand -+ * shake could not work. Or the current owner is not -+ * willing to wait (zero timeout). Or the console does -+ * not enter the safe state before timeout passed. The -+ * caller might still use the unsafe hostile takeover -+ * when allowed. ++ * If @up is an nbcon console, the console will be marked as safe and ++ * released. Otherwise this function does nothing. ++ */ ++void nbcon_release(struct uart_port *up) ++{ ++ struct console *con = up->cons; ++ struct nbcon_context ctxt = { ++ .console = con, ++ .prio = NBCON_PRIO_NORMAL, ++ }; ++ ++ if (!con->locked_port) ++ return; ++ ++ if (nbcon_context_exit_unsafe(&ctxt)) ++ nbcon_context_release(&ctxt); ++ ++ con->locked_port = false; ++} ++EXPORT_SYMBOL_GPL(nbcon_release); +-- +2.51.0 + +From 71ee9faa67972442cdb12785b91ad93168316220 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 8 Dec 2023 15:54:27 +0000 +Subject: [PATCH 126/213] printk: nbcon: Add driver_enter/driver_exit console + callbacks + +Console drivers need some mechanism to synchronize between "normal +driver activity" and console printing. For uart serial drivers it +is the port lock. Other types of console drivers (network, +graphics, USB) will need something as well. + +Provide 2 new mandatory nbcon console callbacks (driver_enter and +driver_exit) to allow the consoles drivers to implement the +appropriate synchronization. The callbacks are also expected to +disable/enable migration. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 6a657533aae1..4c2bebe734bf 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -301,6 +301,8 @@ struct nbcon_write_context { + * @node: hlist node for the console list + * + * @write_atomic: Write callback for atomic context ++ * @driver_enter: Callback to begin synchronization with driver code ++ * @driver_exit: Callback to finish synchronization with driver code + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer +@@ -328,6 +330,8 @@ struct console { + /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); ++ void (*driver_enter)(struct console *con, unsigned long *flags); ++ void (*driver_exit)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; +-- +2.51.0 + +From 9370e70a3e9cb51a4bc61b1034a32d6979466a3f Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 12 Sep 2023 13:25:41 +0000 +Subject: [PATCH 127/213] printk: Make console_is_usable() available to nbcon + +Move console_is_usable() as-is into internal.h so that it can +be used by nbcon printing functions as well. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 32 ++++++++++++++++++++++++++++++++ + kernel/printk/printk.c | 30 ------------------------------ + 2 files changed, 32 insertions(+), 30 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index ac2d9750e5f8..378ccca007ca 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -78,6 +78,36 @@ bool nbcon_alloc(struct console *con); + void nbcon_init(struct console *con); + void nbcon_free(struct console *con); + ++/* ++ * Check if the given console is currently capable and allowed to print ++ * records. + * -+ * -EAGAIN: @cur has changed when creating the handover request. -+ * The caller should retry with direct acquire. ++ * Requires the console_srcu_read_lock. + */ -+static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, -+ struct nbcon_state *cur) ++static inline bool console_is_usable(struct console *con) +{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct nbcon_state new; -+ int timeout; -+ int request_err = -EBUSY; ++ short flags = console_srcu_read_flags(con); ++ ++ if (!(flags & CON_ENABLED)) ++ return false; ++ ++ if ((flags & CON_SUSPENDED)) ++ return false; ++ ++ if (!con->write) ++ return false; + + /* -+ * Check that the handover is called when the direct acquire failed -+ * with -EBUSY. ++ * Console drivers may assume that per-cpu resources have been ++ * allocated. So unless they're explicitly marked as being able to ++ * cope (CON_ANYTIME) don't call them until this CPU is officially up. + */ -+ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); -+ WARN_ON_ONCE(!cur->unsafe); ++ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) ++ return false; ++ ++ return true; ++} ++ + #else + + #define PRINTK_PREFIX_MAX 0 +@@ -99,6 +129,8 @@ static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } + static inline void nbcon_free(struct console *con) { } + ++static inline bool console_is_usable(struct console *con) { return false; } ++ + #endif /* CONFIG_PRINTK */ + + extern struct printk_buffers printk_shared_pbufs; +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 6c6e1f8060af..6649d634aa0e 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2701,36 +2701,6 @@ int is_console_locked(void) + } + EXPORT_SYMBOL(is_console_locked); + +-/* +- * Check if the given console is currently capable and allowed to print +- * records. +- * +- * Requires the console_srcu_read_lock. +- */ +-static inline bool console_is_usable(struct console *con) +-{ +- short flags = console_srcu_read_flags(con); +- +- if (!(flags & CON_ENABLED)) +- return false; +- +- if ((flags & CON_SUSPENDED)) +- return false; +- +- if (!con->write) +- return false; +- +- /* +- * Console drivers may assume that per-cpu resources have been +- * allocated. So unless they're explicitly marked as being able to +- * cope (CON_ANYTIME) don't call them until this CPU is officially up. +- */ +- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) +- return false; +- +- return true; +-} +- + static void __console_unlock(void) + { + console_locked = 0; +-- +2.51.0 + +From 4f40e2bedd0d5199d593adcf504e9fbbca65d051 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 12 Sep 2023 13:53:21 +0000 +Subject: [PATCH 128/213] printk: Let console_is_usable() handle nbcon + +The nbcon consoles use a different printing callback. For nbcon +consoles, check for the write_atomic() callback instead of +write(). + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 378ccca007ca..d741d19bb9db 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -80,6 +80,8 @@ void nbcon_free(struct console *con); + + /* + * Check if the given console is currently capable and allowed to print ++ * records. Note that this function does not consider the current context, ++ * which can also play a role in deciding if @con can be used to print + * records. + * + * Requires the console_srcu_read_lock. +@@ -94,8 +96,13 @@ static inline bool console_is_usable(struct console *con) + if ((flags & CON_SUSPENDED)) + return false; + +- if (!con->write) +- return false; ++ if (flags & CON_NBCON) { ++ if (!con->write_atomic) ++ return false; ++ } else { ++ if (!con->write) ++ return false; ++ } + + /* + * Console drivers may assume that per-cpu resources have been +-- +2.51.0 + +From 7de4222a4297f2027627a61e7551bb418681c19e Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 12 Sep 2023 13:45:33 +0000 +Subject: [PATCH 129/213] printk: Add @flags argument for console_is_usable() + +The caller of console_is_usable() usually needs @console->flags +for its own checks. Rather than having console_is_usable() read +its own copy, make the caller pass in the @flags. This also +ensures that the caller saw the same @flags value. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 8 ++------ + kernel/printk/printk.c | 5 +++-- + 2 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index d741d19bb9db..cd1ce0235f01 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -83,13 +83,9 @@ void nbcon_free(struct console *con); + * records. Note that this function does not consider the current context, + * which can also play a role in deciding if @con can be used to print + * records. +- * +- * Requires the console_srcu_read_lock. + */ +-static inline bool console_is_usable(struct console *con) ++static inline bool console_is_usable(struct console *con, short flags) + { +- short flags = console_srcu_read_flags(con); +- + if (!(flags & CON_ENABLED)) + return false; + +@@ -136,7 +132,7 @@ static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } + static inline void nbcon_free(struct console *con) { } + +-static inline bool console_is_usable(struct console *con) { return false; } ++static inline bool console_is_usable(struct console *con, short flags) { return false; } + + #endif /* CONFIG_PRINTK */ + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 6649d634aa0e..1fea8409e99d 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2946,9 +2946,10 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { ++ short flags = console_srcu_read_flags(con); + bool progress; + +- if (!console_is_usable(con)) ++ if (!console_is_usable(con, flags)) + continue; + any_usable = true; + +@@ -3815,7 +3816,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + * that they make forward progress, so only increment + * @diff for usable consoles. + */ +- if (!console_is_usable(c)) ++ if (!console_is_usable(c, flags)) + continue; + + if (flags & CON_NBCON) { +-- +2.51.0 + +From 1d15c88aff35489ef1d04b394d7d80cfdced0f1a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 12 Sep 2023 12:00:08 +0000 +Subject: [PATCH 130/213] printk: nbcon: Provide function to flush using + write_atomic() + +Provide nbcon_atomic_flush_all() to perform flushing of all +registered nbcon consoles using their write_atomic() callback. +Like with legacy consoles, the nbcon consoles are flushed one +record per console. This allows all nbcon consoles to print +lines pseudo-simultaneously, rather than one console waiting +for the full ringbuffer to dump to another console before +printing anything. + +Unlike console_flush_all(), nbcon_atomic_flush_all() will only +flush up through the newest record at the time of the call. +This prevents a CPU from printing unbounded when other CPUs are +adding records. + +Perform nbcon console atomic flushing in +console_flush_on_panic(). This function is not only used in +panic() but also other locations where there may be stored +messages that need to be flushed. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 2 + + kernel/printk/nbcon.c | 100 ++++++++++++++++++++++++++++++++++++++- + kernel/printk/printk.c | 2 + + 3 files changed, 102 insertions(+), 2 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index cd1ce0235f01..d6cb8d2be944 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -77,6 +77,7 @@ void nbcon_seq_force(struct console *con, u64 seq); + bool nbcon_alloc(struct console *con); + void nbcon_init(struct console *con); + void nbcon_free(struct console *con); ++void nbcon_atomic_flush_all(void); + + /* + * Check if the given console is currently capable and allowed to print +@@ -131,6 +132,7 @@ static inline void nbcon_seq_force(struct console *con, u64 seq) { } + static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } + static inline void nbcon_free(struct console *con) { } ++static inline void nbcon_atomic_flush_all(void) { } + + static inline bool console_is_usable(struct console *con, short flags) { return false; } + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index a5707fe1e95e..d0780168e319 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -539,7 +539,6 @@ static struct printk_buffers panic_nbcon_pbufs; + * in an unsafe state. Otherwise, on success the caller may assume + * the console is not in an unsafe state. + */ +-__maybe_unused + static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) + { + unsigned int cpu = smp_processor_id(); +@@ -841,7 +840,6 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + * When true is returned, @wctxt->ctxt.backlog indicates whether there are + * still records pending in the ringbuffer, + */ +-__maybe_unused + static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + { + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); +@@ -930,6 +928,104 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + return nbcon_context_exit_unsafe(ctxt); + } + ++/** ++ * nbcon_atomic_emit_one - Print one record for an nbcon console using the ++ * write_atomic() callback ++ * @wctxt: An initialized write context struct to use ++ * for this context ++ * ++ * Return: False if the given console could not print a record or there ++ * are no more records to print, otherwise true. ++ * ++ * This is an internal helper to handle the locking of the console before ++ * calling nbcon_emit_next_record(). ++ */ ++static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) ++{ ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + -+ /* Handover is not possible on the same CPU. */ -+ if (cur->cpu == cpu) -+ return -EBUSY; ++ if (!nbcon_context_try_acquire(ctxt)) ++ return false; + + /* -+ * Console stays unsafe after an unsafe takeover until re-initialized. -+ * Waiting is not going to help in this case. ++ * nbcon_emit_next_record() returns false when the console was ++ * handed over or taken over. In both cases the context is no ++ * longer valid. + */ -+ if (cur->unsafe_takeover) -+ return -EBUSY; ++ if (!nbcon_emit_next_record(wctxt)) ++ return false; + -+ /* Is the caller willing to wait? */ -+ if (ctxt->spinwait_max_us == 0) -+ return -EBUSY; ++ nbcon_context_release(ctxt); + -+ /* -+ * Setup a request for the handover. The caller should try to acquire -+ * the console directly when the current state has been modified. -+ */ -+ new.atom = cur->atom; -+ new.req_prio = ctxt->prio; -+ if (!nbcon_state_try_cmpxchg(con, cur, &new)) -+ return -EAGAIN; ++ return ctxt->backlog; ++} + -+ cur->atom = new.atom; ++/** ++ * __nbcon_atomic_flush_all - Flush all nbcon consoles using their ++ * write_atomic() callback ++ * @stop_seq: Flush up until this record ++ */ ++static void __nbcon_atomic_flush_all(u64 stop_seq) ++{ ++ struct nbcon_write_context wctxt = { }; ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); ++ struct console *con; ++ bool any_progress; ++ int cookie; + -+ /* Wait until there is no owner and then acquire the console. */ -+ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { -+ /* On successful acquire, this request is cleared. */ -+ request_err = nbcon_context_try_acquire_requested(ctxt, cur); -+ if (!request_err) -+ return 0; ++ do { ++ any_progress = false; + -+ /* -+ * If the acquire should be aborted, it must be ensured -+ * that the request is removed before returning to caller. -+ */ -+ if (request_err == -EPERM) -+ break; ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ short flags = console_srcu_read_flags(con); ++ unsigned long irq_flags; + -+ udelay(1); ++ if (!(flags & CON_NBCON)) ++ continue; + -+ /* Re-read the state because some time has passed. */ -+ nbcon_state_read(con, cur); -+ } ++ if (!console_is_usable(con, flags)) ++ continue; + -+ /* Timed out or aborted. Carefully remove handover request. */ -+ do { -+ /* -+ * No need to remove request if there is a new waiter. This -+ * can only happen if a higher priority context has taken over -+ * the console or the handover request. -+ */ -+ if (!nbcon_waiter_matches(cur, ctxt->prio)) -+ return -EPERM; ++ if (nbcon_seq_read(con) >= stop_seq) ++ continue; ++ ++ memset(ctxt, 0, sizeof(*ctxt)); ++ ctxt->console = con; ++ ctxt->spinwait_max_us = 2000; ++ ctxt->prio = NBCON_PRIO_NORMAL; + -+ /* Unset request for handover. */ -+ new.atom = cur->atom; -+ new.req_prio = NBCON_PRIO_NONE; -+ if (nbcon_state_try_cmpxchg(con, cur, &new)) { + /* -+ * Request successfully unset. Report failure of -+ * acquiring via handover. ++ * Atomic flushing does not use console driver ++ * synchronization (i.e. it does not hold the port ++ * lock for uart consoles). Therefore IRQs must be ++ * disabled to avoid being interrupted and then ++ * calling into a driver that will deadlock trying ++ * acquire console ownership. + */ -+ cur->atom = new.atom; -+ return request_err; -+ } ++ local_irq_save(irq_flags); + -+ /* -+ * Unable to remove request. Try to acquire in case -+ * the owner has released the lock. -+ */ -+ } while (nbcon_context_try_acquire_requested(ctxt, cur)); ++ any_progress |= nbcon_atomic_emit_one(&wctxt); + -+ /* Lucky timing. The acquire succeeded while removing the request. */ -+ return 0; ++ local_irq_restore(irq_flags); ++ } ++ console_srcu_read_unlock(cookie); ++ } while (any_progress); +} + +/** -+ * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover -+ * @ctxt: The context of the caller -+ * @cur: The current console state -+ * -+ * Acquire the console even in the unsafe state. -+ * -+ * It can be permitted by setting the 'allow_unsafe_takeover' field only -+ * by the final attempt to flush messages in panic(). ++ * nbcon_atomic_flush_all - Flush all nbcon consoles using their ++ * write_atomic() callback + * -+ * Return: 0 on success. -EPERM when not allowed by the context. ++ * Flush the backlog up through the currently newest record. Any new ++ * records added while flushing will not be flushed. This is to avoid ++ * one CPU printing unbounded because other CPUs continue to add records. + */ -+static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, -+ struct nbcon_state *cur) ++void nbcon_atomic_flush_all(void) +{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct nbcon_state new; ++ __nbcon_atomic_flush_all(prb_next_reserve_seq(prb)); ++} + -+ if (!ctxt->allow_unsafe_takeover) -+ return -EPERM; + /** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 1fea8409e99d..e24fe418c69b 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3186,6 +3186,8 @@ void console_flush_on_panic(enum con_flush_mode mode) + console_srcu_read_unlock(cookie); + } + ++ nbcon_atomic_flush_all(); + -+ /* Ensure caller is allowed to perform unsafe hostile takeovers. */ -+ if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) -+ return -EPERM; + console_flush_all(false, &next_seq, &handover); + } + +-- +2.51.0 + +From 09fddfe435b08018bc3dd91bb775e057ffe2bcb8 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 24 Oct 2023 14:13:14 +0000 +Subject: [PATCH 131/213] printk: Track registered boot consoles + +Unfortunately it is not known if a boot console and a regular +(legacy or nbcon) console use the same hardware. For this reason +they must not be allowed to print simultaneously. + +For legacy consoles this is not an issue because they are +already synchronized with the boot consoles using the console +lock. However nbcon consoles can be triggered separately. + +Add a global flag @have_boot_console to identify if any boot +consoles are registered. This will be used in follow-up commits +to ensure that boot consoles and nbcon consoles cannot print +simultaneously. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index e24fe418c69b..ce09dd948453 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -463,6 +463,14 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_MUTEX(syslog_lock); + ++/* ++ * Specifies if a boot console is registered. If boot consoles are present, ++ * nbcon consoles cannot print simultaneously and must be synchronized by ++ * the console lock. This is because boot consoles and nbcon consoles may ++ * have mapped the same hardware. ++ */ ++bool have_boot_console; ++ + #ifdef CONFIG_PRINTK + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ +@@ -3531,6 +3539,9 @@ void register_console(struct console *newcon) + if (newcon->flags & CON_NBCON) + nbcon_init(newcon); + ++ if (newcon->flags & CON_BOOT) ++ have_boot_console = true; + + /* + * Put this console in the list - keep the + * preferred driver at the head of the list. +@@ -3583,6 +3594,8 @@ EXPORT_SYMBOL(register_console); + /* Must be called under console_list_lock(). */ + static int unregister_console_locked(struct console *console) + { ++ bool found_boot_con = false; ++ struct console *c; + int res; + + lockdep_assert_console_list_lock_held(); +@@ -3630,6 +3643,17 @@ static int unregister_console_locked(struct console *console) + if (console->exit) + res = console->exit(console); + + /* -+ * Check that try_acquire_direct() and try_acquire_handover() returned -+ * -EBUSY in the right situation. ++ * With this console gone, the global flags tracking registered ++ * console types may have changed. Update them. + */ -+ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); -+ WARN_ON_ONCE(cur->unsafe != true); -+ -+ do { -+ new.atom = cur->atom; -+ new.cpu = cpu; -+ new.prio = ctxt->prio; -+ new.unsafe |= cur->unsafe_takeover; -+ new.unsafe_takeover |= cur->unsafe; -+ -+ } while (!nbcon_state_try_cmpxchg(con, cur, &new)); -+ -+ return 0; -+} -+ -+static struct printk_buffers panic_nbcon_pbufs; ++ for_each_console(c) { ++ if (c->flags & CON_BOOT) ++ found_boot_con = true; ++ } ++ if (!found_boot_con) ++ have_boot_console = false; + + return res; + } + +-- +2.51.0 + +From ac1bcfdf6d617d2ae2806faeade9321a4c1b45bf Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 19 Sep 2023 14:33:27 +0000 +Subject: [PATCH 132/213] printk: nbcon: Use nbcon consoles in + console_flush_all() + +Allow nbcon consoles to print messages in the printk() caller +context by integrating them into console_flush_all(). The +write_atomic() callback is used for printing. + +Provide nbcon_console_emit_next_record(), which acts as the +nbcon variant of console_emit_next_record(). Call this variant +within console_flush_all() for nbcon consoles. Since nbcon +consoles use their own @nbcon_seq variable to track the next +record to print, this also must be appropriately handled. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 5 ++++ + kernel/printk/nbcon.c | 50 ++++++++++++++++++++++++++++++++++++++++ + kernel/printk/printk.c | 19 +++++++++++---- + 3 files changed, 69 insertions(+), 5 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index d6cb8d2be944..e7cdea097cec 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -71,6 +71,8 @@ void defer_console_output(void); + + u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags); ++void console_lock_spinning_enable(void); ++int console_lock_spinning_disable_and_check(int cookie); + + u64 nbcon_seq_read(struct console *con); + void nbcon_seq_force(struct console *con, u64 seq); +@@ -78,6 +80,7 @@ bool nbcon_alloc(struct console *con); + void nbcon_init(struct console *con); + void nbcon_free(struct console *con); + void nbcon_atomic_flush_all(void); ++bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie); + + /* + * Check if the given console is currently capable and allowed to print +@@ -133,6 +136,8 @@ static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } + static inline void nbcon_free(struct console *con) { } + static inline void nbcon_atomic_flush_all(void) { } ++static inline bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, ++ int cookie) { return false; } + + static inline bool console_is_usable(struct console *con, short flags) { return false; } + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index d0780168e319..c17cf3ea6153 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -532,6 +532,7 @@ static struct printk_buffers panic_nbcon_pbufs; + * nbcon_context_try_acquire - Try to acquire nbcon console + * @ctxt: The context of the caller + * ++ * Context: Any context which could not be migrated to another CPU. + * Return: True if the console was acquired. False otherwise. + * + * If the caller allowed an unsafe hostile takeover, on success the +@@ -960,6 +961,55 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) + return ctxt->backlog; + } + +/** -+ * nbcon_context_try_acquire - Try to acquire nbcon console -+ * @ctxt: The context of the caller ++ * nbcon_atomic_emit_next_record - Print one record for an nbcon console ++ * using the write_atomic() callback ++ * @con: The console to print on ++ * @handover: Will be set to true if a printk waiter has taken over the ++ * console_lock, in which case the caller is no longer holding ++ * both the console_lock and the SRCU read lock. Otherwise it ++ * is set to false. ++ * @cookie: The cookie from the SRCU read lock. + * + * Context: Any context which could not be migrated to another CPU. -+ * Return: True if the console was acquired. False otherwise. ++ * Return: True if a record could be printed, otherwise false. + * -+ * If the caller allowed an unsafe hostile takeover, on success the -+ * caller should check the current console state to see if it is -+ * in an unsafe state. Otherwise, on success the caller may assume -+ * the console is not in an unsafe state. ++ * This function is meant to be called by console_flush_all() to print records ++ * on nbcon consoles using the write_atomic() callback. Essentially it is the ++ * nbcon version of console_emit_next_record(). + */ -+static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) ++bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie) +{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct nbcon_state cur; -+ int err; -+ -+ nbcon_state_read(con, &cur); -+try_again: -+ err = nbcon_context_try_acquire_direct(ctxt, &cur); -+ if (err != -EBUSY) -+ goto out; -+ -+ err = nbcon_context_try_acquire_handover(ctxt, &cur); -+ if (err == -EAGAIN) -+ goto try_again; -+ if (err != -EBUSY) -+ goto out; -+ -+ err = nbcon_context_try_acquire_hostile(ctxt, &cur); -+out: -+ if (err) -+ return false; ++ struct nbcon_write_context wctxt = { }; ++ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); ++ unsigned long driver_flags; ++ bool progress = false; ++ unsigned long flags; + -+ /* Acquire succeeded. */ ++ *handover = false; + -+ /* Assign the appropriate buffer for this context. */ -+ if (atomic_read(&panic_cpu) == cpu) -+ ctxt->pbufs = &panic_nbcon_pbufs; -+ else -+ ctxt->pbufs = con->pbufs; ++ /* Use the same locking order as console_emit_next_record(). */ ++ printk_safe_enter_irqsave(flags); ++ console_lock_spinning_enable(); ++ stop_critical_timings(); + -+ /* Set the record sequence for this context to print. */ -+ ctxt->seq = nbcon_seq_read(ctxt->console); ++ con->driver_enter(con, &driver_flags); ++ cant_migrate(); + -+ return true; -+} ++ ctxt->console = con; ++ ctxt->prio = NBCON_PRIO_NORMAL; + -+static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, -+ int expected_prio) -+{ -+ /* -+ * Since consoles can only be acquired by higher priorities, -+ * owning contexts are uniquely identified by @prio. However, -+ * since contexts can unexpectedly lose ownership, it is -+ * possible that later another owner appears with the same -+ * priority. For this reason @cpu is also needed. -+ */ ++ progress = nbcon_atomic_emit_one(&wctxt); + -+ if (cur->prio != expected_prio) -+ return false; ++ con->driver_exit(con, driver_flags); + -+ if (cur->cpu != expected_cpu) -+ return false; ++ start_critical_timings(); ++ *handover = console_lock_spinning_disable_and_check(cookie); ++ printk_safe_exit_irqrestore(flags); + -+ return true; ++ return progress; +} + -+/** -+ * nbcon_context_release - Release the console -+ * @ctxt: The nbcon context from nbcon_context_try_acquire() -+ */ -+static void nbcon_context_release(struct nbcon_context *ctxt) -+{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct nbcon_state cur; -+ struct nbcon_state new; -+ -+ nbcon_state_read(con, &cur); -+ -+ do { -+ if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) -+ break; -+ -+ new.atom = cur.atom; -+ new.prio = NBCON_PRIO_NONE; -+ -+ /* -+ * If @unsafe_takeover is set, it is kept set so that -+ * the state remains permanently unsafe. -+ */ -+ new.unsafe |= cur.unsafe_takeover; -+ -+ } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); + /** + * __nbcon_atomic_flush_all - Flush all nbcon consoles using their + * write_atomic() callback +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index ce09dd948453..575b3425a86a 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1872,7 +1872,7 @@ static bool console_waiter; + * there may be a waiter spinning (like a spinlock). Also it must be + * ready to hand over the lock at the end of the section. + */ +-static void console_lock_spinning_enable(void) ++void console_lock_spinning_enable(void) + { + /* + * Do not use spinning in panic(). The panic CPU wants to keep the lock. +@@ -1911,7 +1911,7 @@ static void console_lock_spinning_enable(void) + * + * Return: 1 if the lock rights were passed, 0 otherwise. + */ +-static int console_lock_spinning_disable_and_check(int cookie) ++int console_lock_spinning_disable_and_check(int cookie) + { + int waiter; + +@@ -2955,13 +2955,22 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); ++ u64 printk_seq; + bool progress; + + if (!console_is_usable(con, flags)) + continue; + any_usable = true; + +- progress = console_emit_next_record(con, handover, cookie); ++ if (flags & CON_NBCON) { ++ progress = nbcon_atomic_emit_next_record(con, handover, cookie); + -+ ctxt->pbufs = NULL; -+} ++ printk_seq = nbcon_seq_read(con); ++ } else { ++ progress = console_emit_next_record(con, handover, cookie); + ++ printk_seq = con->seq; ++ } + + /* + * If a handover has occurred, the SRCU read lock +@@ -2971,8 +2980,8 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + return false; + + /* Track the next of the highest seq flushed. */ +- if (con->seq > *next_seq) +- *next_seq = con->seq; ++ if (printk_seq > *next_seq) ++ *next_seq = printk_seq; + + if (!progress) + continue; +-- +2.51.0 + +From 51a9706d6947d680339b8b1318bbce76b20ac7ae Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Thu, 14 Dec 2023 14:38:42 +0000 +Subject: [PATCH 133/213] printk: nbcon: Assign priority based on CPU state + +Use the current state of the CPU to determine which priority to +assign to the printing context. + +Note: The uart_port wrapper, which is responsible for non-console- + printing activities, will always use NORMAL priority. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 2 ++ + kernel/printk/nbcon.c | 30 ++++++++++++++++++++++++++++-- + 2 files changed, 30 insertions(+), 2 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index e7cdea097cec..c4417fc48b7e 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -79,6 +79,7 @@ void nbcon_seq_force(struct console *con, u64 seq); + bool nbcon_alloc(struct console *con); + void nbcon_init(struct console *con); + void nbcon_free(struct console *con); ++enum nbcon_prio nbcon_get_default_prio(void); + void nbcon_atomic_flush_all(void); + bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie); + +@@ -135,6 +136,7 @@ static inline void nbcon_seq_force(struct console *con, u64 seq) { } + static inline bool nbcon_alloc(struct console *con) { return false; } + static inline void nbcon_init(struct console *con) { } + static inline void nbcon_free(struct console *con) { } ++static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } + static inline void nbcon_atomic_flush_all(void) { } + static inline bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, + int cookie) { return false; } +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index c17cf3ea6153..3e999784ba5c 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -961,6 +961,22 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) + return ctxt->backlog; + } + +/** -+ * nbcon_context_can_proceed - Check whether ownership can proceed -+ * @ctxt: The nbcon context from nbcon_context_try_acquire() -+ * @cur: The current console state -+ * -+ * Return: True if this context still owns the console. False if -+ * ownership was handed over or taken. -+ * -+ * Must be invoked when entering the unsafe state to make sure that it still -+ * owns the lock. Also must be invoked when exiting the unsafe context -+ * to eventually free the lock for a higher priority context which asked -+ * for the friendly handover. -+ * -+ * It can be called inside an unsafe section when the console is just -+ * temporary in safe state instead of exiting and entering the unsafe -+ * state. -+ * -+ * Also it can be called in the safe context before doing an expensive -+ * safe operation. It does not make sense to do the operation when -+ * a higher priority context took the lock. ++ * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon ++ * printing on the current CPU + * -+ * When this function returns false then the calling context no longer owns -+ * the console and is no longer allowed to go forward. In this case it must -+ * back out immediately and carefully. The buffer content is also no longer -+ * trusted since it no longer belongs to the calling context. ++ * Context: Any context which could not be migrated to another CPU. ++ * Return: The nbcon_prio to use for acquiring an nbcon console in this ++ * context for printing. + */ -+static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur) ++enum nbcon_prio nbcon_get_default_prio(void) +{ -+ unsigned int cpu = smp_processor_id(); -+ -+ /* Make sure this context still owns the console. */ -+ if (!nbcon_owner_matches(cur, cpu, ctxt->prio)) -+ return false; -+ -+ /* The console owner can proceed if there is no waiter. */ -+ if (cur->req_prio == NBCON_PRIO_NONE) -+ return true; -+ -+ /* -+ * A console owner within an unsafe region is always allowed to -+ * proceed, even if there are waiters. It can perform a handover -+ * when exiting the unsafe region. Otherwise the waiter will -+ * need to perform an unsafe hostile takeover. -+ */ -+ if (cur->unsafe) -+ return true; ++ if (this_cpu_in_panic()) ++ return NBCON_PRIO_PANIC; + -+ /* Waiters always have higher priorities than owners. */ -+ WARN_ON_ONCE(cur->req_prio <= cur->prio); ++ return NBCON_PRIO_NORMAL; ++} + -+ /* -+ * Having a safe point for take over and eventually a few -+ * duplicated characters or a full line is way better than a -+ * hostile takeover. Post processing can take care of the garbage. -+ * Release and hand over. -+ */ -+ nbcon_context_release(ctxt); + /** + * nbcon_atomic_emit_next_record - Print one record for an nbcon console + * using the write_atomic() callback +@@ -997,7 +1013,7 @@ bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cook + cant_migrate(); + + ctxt->console = con; +- ctxt->prio = NBCON_PRIO_NORMAL; ++ ctxt->prio = nbcon_get_default_prio(); + + progress = nbcon_atomic_emit_one(&wctxt); + +@@ -1043,7 +1059,6 @@ static void __nbcon_atomic_flush_all(u64 stop_seq) + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->spinwait_max_us = 2000; +- ctxt->prio = NBCON_PRIO_NORMAL; + + /* + * Atomic flushing does not use console driver +@@ -1052,9 +1067,14 @@ static void __nbcon_atomic_flush_all(u64 stop_seq) + * disabled to avoid being interrupted and then + * calling into a driver that will deadlock trying + * acquire console ownership. ++ * ++ * This also disables migration in order to get the ++ * current CPU priority. + */ + local_irq_save(irq_flags); + ++ ctxt->prio = nbcon_get_default_prio(); + -+ /* -+ * It is not clear whether the waiter really took over ownership. The -+ * outermost callsite must make the final decision whether console -+ * ownership is needed for it to proceed. If yes, it must reacquire -+ * ownership (possibly hostile) before carefully proceeding. -+ * -+ * The calling context no longer owns the console so go back all the -+ * way instead of trying to implement reacquire heuristics in tons of -+ * places. -+ */ -+ return false; + any_progress |= nbcon_atomic_emit_one(&wctxt); + + local_irq_restore(irq_flags); +@@ -1166,6 +1186,9 @@ static inline bool uart_is_nbcon(struct uart_port *up) + * + * If @up is an nbcon console, this console will be acquired and marked as + * unsafe. Otherwise this function does nothing. ++ * ++ * nbcon consoles acquired via the port lock wrapper always use priority ++ * NBCON_PRIO_NORMAL. + */ + void nbcon_acquire(struct uart_port *up) + { +@@ -1200,6 +1223,9 @@ EXPORT_SYMBOL_GPL(nbcon_acquire); + * + * If @up is an nbcon console, the console will be marked as safe and + * released. Otherwise this function does nothing. ++ * ++ * nbcon consoles acquired via the port lock wrapper always use priority ++ * NBCON_PRIO_NORMAL. + */ + void nbcon_release(struct uart_port *up) + { +-- +2.51.0 + +From 7a91e2808e6bd5528591c60c1a8f5a62c16aad24 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 20 Oct 2023 10:03:42 +0000 +Subject: [PATCH 134/213] printk: nbcon: Add unsafe flushing on panic + +Add nbcon_atomic_flush_unsafe() to flush all nbcon consoles +using the write_atomic() callback and allowing unsafe hostile +takeovers. Call this at the end of panic() as a final attempt +to flush any pending messages. + +Note that legacy consoles use unsafe methods for flushing +from the beginning of panic (see bust_spinlocks()). Therefore, +systems using both legacy and nbcon consoles may still fail to +see panic messages due to unsafe legacy console usage. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/printk.h | 5 +++++ + kernel/panic.c | 1 + + kernel/printk/nbcon.c | 18 ++++++++++++++++-- + 3 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/include/linux/printk.h b/include/linux/printk.h +index cdb5e352ffb4..bd6c54e15cfb 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -199,6 +199,7 @@ extern asmlinkage void dump_stack(void) __cold; + void printk_trigger_flush(void); + extern void nbcon_acquire(struct uart_port *up); + extern void nbcon_release(struct uart_port *up); ++void nbcon_atomic_flush_unsafe(void); + #else + static inline __printf(1, 0) + int vprintk(const char *s, va_list args) +@@ -287,6 +288,10 @@ static inline void nbcon_release(struct uart_port *up) + { + } + ++static inline void nbcon_atomic_flush_unsafe(void) ++{ ++} ++ + #endif + + #ifdef CONFIG_SMP +diff --git a/kernel/panic.c b/kernel/panic.c +index d7973e975474..2d50347eb075 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -449,6 +449,7 @@ void panic(const char *fmt, ...) + * Explicitly flush the kernel log buffer one last time. + */ + console_flush_on_panic(CONSOLE_FLUSH_PENDING); ++ nbcon_atomic_flush_unsafe(); + + local_irq_enable(); + for (i = 0; ; i += PANIC_TIMER_STEP) { +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 3e999784ba5c..1c01f88d596d 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1030,8 +1030,9 @@ bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cook + * __nbcon_atomic_flush_all - Flush all nbcon consoles using their + * write_atomic() callback + * @stop_seq: Flush up until this record ++ * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers + */ +-static void __nbcon_atomic_flush_all(u64 stop_seq) ++static void __nbcon_atomic_flush_all(u64 stop_seq, bool allow_unsafe_takeover) + { + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); +@@ -1059,6 +1060,7 @@ static void __nbcon_atomic_flush_all(u64 stop_seq) + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->spinwait_max_us = 2000; ++ ctxt->allow_unsafe_takeover = allow_unsafe_takeover; + + /* + * Atomic flushing does not use console driver +@@ -1093,7 +1095,19 @@ static void __nbcon_atomic_flush_all(u64 stop_seq) + */ + void nbcon_atomic_flush_all(void) + { +- __nbcon_atomic_flush_all(prb_next_reserve_seq(prb)); ++ __nbcon_atomic_flush_all(prb_next_reserve_seq(prb), false); +} + +/** -+ * nbcon_can_proceed - Check whether ownership can proceed -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Return: True if this context still owns the console. False if -+ * ownership was handed over or taken. -+ * -+ * It is used in nbcon_enter_unsafe() to make sure that it still owns the -+ * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock -+ * for a higher priority context which asked for the friendly handover. -+ * -+ * It can be called inside an unsafe section when the console is just -+ * temporary in safe state instead of exiting and entering the unsafe state. -+ * -+ * Also it can be called in the safe context before doing an expensive safe -+ * operation. It does not make sense to do the operation when a higher -+ * priority context took the lock. ++ * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their ++ * write_atomic() callback and allowing unsafe hostile takeovers + * -+ * When this function returns false then the calling context no longer owns -+ * the console and is no longer allowed to go forward. In this case it must -+ * back out immediately and carefully. The buffer content is also no longer -+ * trusted since it no longer belongs to the calling context. ++ * Flush the backlog up through the currently newest record. Unsafe hostile ++ * takeovers will be performed, if necessary. + */ -+bool nbcon_can_proceed(struct nbcon_write_context *wctxt) ++void nbcon_atomic_flush_unsafe(void) +{ -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ struct nbcon_state cur; ++ __nbcon_atomic_flush_all(prb_next_reserve_seq(prb), true); + } + + /** +-- +2.51.0 + +From 5d6e9e2d3e59bb9493c935f795f3ad6a66ed8de8 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 26 Sep 2023 12:44:07 +0000 +Subject: [PATCH 135/213] printk: Avoid console_lock dance if no legacy or boot + consoles + +Currently the console lock is used to attempt legacy-type +printing even if there are no legacy or boot consoles registered. +If no such consoles are registered, the console lock does not +need to be taken. + +Also, if boot consoles are registered, nbcon consoles must +perform their atomic printing under the console lock in order +to be synchronized with boot consoles. + +Add tracking of legacy console registration and use it with +boot console tracking to avoid unnecessary code paths, i.e. +do not use the console lock if there are no boot consoles +and no legacy consoles. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 12 ++++++++ + kernel/printk/printk.c | 59 ++++++++++++++++++++++++++++++---------- + 2 files changed, 56 insertions(+), 15 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index c4417fc48b7e..e2675981dfc5 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -44,6 +44,16 @@ enum printk_info_flags { + }; + + extern struct printk_ringbuffer *prb; ++extern bool have_legacy_console; ++extern bool have_boot_console; + -+ nbcon_state_read(con, &cur); ++/* ++ * Specifies if the console lock/unlock dance is needed for console ++ * printing. If @have_boot_console is true, the nbcon consoles will ++ * be printed serially along with the legacy consoles because nbcon ++ * consoles cannot print simultaneously with boot consoles. ++ */ ++#define printing_via_unlock (have_legacy_console || have_boot_console) + + __printf(4, 0) + int vprintk_store(int facility, int level, +@@ -122,6 +132,8 @@ static inline bool console_is_usable(struct console *con, short flags) + #define PRINTK_MESSAGE_MAX 0 + #define PRINTKRB_RECORD_MAX 0 + ++#define printing_via_unlock (false) + -+ return nbcon_context_can_proceed(ctxt, &cur); -+} -+EXPORT_SYMBOL_GPL(nbcon_can_proceed); + /* + * In !PRINTK builds we still export console_sem + * semaphore and some of console functions (console_unlock()/etc.), so +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 575b3425a86a..1152759ad682 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -463,6 +463,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_MUTEX(syslog_lock); + ++/* ++ * Specifies if a legacy console is registered. If legacy consoles are ++ * present, it is necessary to perform the console_lock/console_unlock dance ++ * whenever console flushing should occur. ++ */ ++bool have_legacy_console; ++ + /* + * Specifies if a boot console is registered. If boot consoles are present, + * nbcon consoles cannot print simultaneously and must be synchronized by +@@ -2351,7 +2358,7 @@ asmlinkage int vprintk_emit(int facility, int level, + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + + /* If called from the scheduler, we can not call up(). */ +- if (!in_sched) { ++ if (!in_sched && printing_via_unlock) { + /* + * The caller may be holding system-critical or + * timing-sensitive locks. Disable preemption during +@@ -2652,7 +2659,7 @@ void resume_console(void) + */ + static int console_cpu_notify(unsigned int cpu) + { +- if (!cpuhp_tasks_frozen) { ++ if (!cpuhp_tasks_frozen && printing_via_unlock) { + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); +@@ -3205,7 +3212,8 @@ void console_flush_on_panic(enum con_flush_mode mode) + + nbcon_atomic_flush_all(); + +- console_flush_all(false, &next_seq, &handover); ++ if (printing_via_unlock) ++ console_flush_all(false, &next_seq, &handover); + } + + /* +@@ -3545,8 +3553,11 @@ void register_console(struct console *newcon) + newcon->dropped = 0; + console_init_seq(newcon, bootcon_registered); + +- if (newcon->flags & CON_NBCON) ++ if (newcon->flags & CON_NBCON) { + nbcon_init(newcon); ++ } else { ++ have_legacy_console = true; ++ } + + if (newcon->flags & CON_BOOT) + have_boot_console = true; +@@ -3603,6 +3614,7 @@ EXPORT_SYMBOL(register_console); + /* Must be called under console_list_lock(). */ + static int unregister_console_locked(struct console *console) + { ++ bool found_legacy_con = false; + bool found_boot_con = false; + struct console *c; + int res; +@@ -3659,9 +3671,13 @@ static int unregister_console_locked(struct console *console) + for_each_console(c) { + if (c->flags & CON_BOOT) + found_boot_con = true; ++ if (!(c->flags & CON_NBCON)) ++ found_legacy_con = true; + } + if (!found_boot_con) + have_boot_console = false; ++ if (!found_legacy_con) ++ have_legacy_console = false; + + return res; + } +@@ -3813,6 +3829,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + u64 last_diff = 0; + u64 printk_seq; + short flags; ++ bool locked; + int cookie; + u64 diff; + u64 seq; +@@ -3822,22 +3839,28 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + seq = prb_next_reserve_seq(prb); + + /* Flush the consoles so that records up to @seq are printed. */ +- console_lock(); +- console_unlock(); ++ if (printing_via_unlock) { ++ console_lock(); ++ console_unlock(); ++ } + + for (;;) { + unsigned long begin_jiffies; + unsigned long slept_jiffies; + ++ locked = false; + diff = 0; + +- /* +- * Hold the console_lock to guarantee safe access to +- * console->seq. Releasing console_lock flushes more +- * records in case @seq is still not printed on all +- * usable consoles. +- */ +- console_lock(); ++ if (printing_via_unlock) { ++ /* ++ * Hold the console_lock to guarantee safe access to ++ * console->seq. Releasing console_lock flushes more ++ * records in case @seq is still not printed on all ++ * usable consoles. ++ */ ++ console_lock(); ++ locked = true; ++ } + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { +@@ -3857,6 +3880,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(c); + } else { ++ WARN_ON_ONCE(!locked); + printk_seq = c->seq; + } + +@@ -3868,7 +3892,8 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + if (diff != last_diff && reset_on_progress) + remaining_jiffies = timeout_jiffies; + +- console_unlock(); ++ if (locked) ++ console_unlock(); + + /* Note: @diff is 0 if there are no usable consoles. */ + if (diff == 0 || remaining_jiffies == 0) +@@ -3990,7 +4015,11 @@ void defer_console_output(void) + * New messages may have been added directly to the ringbuffer + * using vprintk_store(), so wake any waiters as well. + */ +- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); ++ int val = PRINTK_PENDING_WAKEUP; ++ ++ if (printing_via_unlock) ++ val |= PRINTK_PENDING_OUTPUT; ++ __wake_up_klogd(val); + } + + void printk_trigger_flush(void) +-- +2.51.0 + +From 15303deb5fad0c13428dcf02025e9abc5a101344 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 11 Dec 2023 09:36:52 +0000 +Subject: [PATCH 136/213] printk: Track nbcon consoles + +Add a global flag @have_nbcon_console to identify if any nbcon +consoles are registered. This will be used in follow-up commits +to preserve legacy behavior when no nbcon consoles are registered. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 1152759ad682..de926ebe06e3 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -470,6 +470,13 @@ static DEFINE_MUTEX(syslog_lock); + */ + bool have_legacy_console; + ++/* ++ * Specifies if an nbcon console is registered. If nbcon consoles are present, ++ * synchronous printing of legacy consoles will not occur during panic until ++ * the backtrace has been stored to the ringbuffer. ++ */ ++bool have_nbcon_console; ++ + /* + * Specifies if a boot console is registered. If boot consoles are present, + * nbcon consoles cannot print simultaneously and must be synchronized by +@@ -3554,6 +3561,7 @@ void register_console(struct console *newcon) + console_init_seq(newcon, bootcon_registered); + + if (newcon->flags & CON_NBCON) { ++ have_nbcon_console = true; + nbcon_init(newcon); + } else { + have_legacy_console = true; +@@ -3615,6 +3623,7 @@ EXPORT_SYMBOL(register_console); + static int unregister_console_locked(struct console *console) + { + bool found_legacy_con = false; ++ bool found_nbcon_con = false; + bool found_boot_con = false; + struct console *c; + int res; +@@ -3671,13 +3680,18 @@ static int unregister_console_locked(struct console *console) + for_each_console(c) { + if (c->flags & CON_BOOT) + found_boot_con = true; +- if (!(c->flags & CON_NBCON)) ++ ++ if (c->flags & CON_NBCON) ++ found_nbcon_con = true; ++ else + found_legacy_con = true; + } + if (!found_boot_con) + have_boot_console = false; + if (!found_legacy_con) + have_legacy_console = false; ++ if (!found_nbcon_con) ++ have_nbcon_console = false; + + return res; + } +-- +2.51.0 + +From 90a42cb3d79377af33035a4b5a8d887441a4844f Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 22 Nov 2023 11:56:58 +0000 +Subject: [PATCH 137/213] printk: Coordinate direct printing in panic + +Perform printing by nbcon consoles on the panic CPU from the +printk() caller context in order to get panic messages printed +as soon as possible. + +If legacy and nbcon consoles are registered, the legacy consoles +will no longer perform direct printing on the panic CPU until +after the backtrace has been stored. This will give the safe +nbcon consoles a chance to print the panic messages before +allowing the unsafe legacy consoles to print. + +If no nbcon consoles are registered, there is no change in +behavior (i.e. legacy consoles will always attempt to print +from the printk() caller context). + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/printk.h | 2 ++ + kernel/panic.c | 2 ++ + kernel/printk/printk.c | 53 ++++++++++++++++++++++++++++++++++++------ + 3 files changed, 50 insertions(+), 7 deletions(-) + +diff --git a/include/linux/printk.h b/include/linux/printk.h +index bd6c54e15cfb..aeab9888e4ee 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -782,3 +782,5 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, + print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) + + #endif ++ ++void printk_legacy_allow_panic_sync(void); +diff --git a/kernel/panic.c b/kernel/panic.c +index 2d50347eb075..f087bfd4c521 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -366,6 +366,8 @@ void panic(const char *fmt, ...) + */ + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); + ++ printk_legacy_allow_panic_sync(); + -+#define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) -+#define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) + panic_print_sys_info(false); + + kmsg_dump(KMSG_DUMP_PANIC); +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index de926ebe06e3..11a83a1651c4 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2336,12 +2336,23 @@ int vprintk_store(int facility, int level, + return ret; + } + ++static bool legacy_allow_panic_sync; + -+/** -+ * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state -+ * @ctxt: The nbcon context from nbcon_context_try_acquire() -+ * @unsafe: The new value for the unsafe bit -+ * -+ * Return: True if the unsafe state was updated and this context still -+ * owns the console. Otherwise false if ownership was handed -+ * over or taken. -+ * -+ * This function allows console owners to modify the unsafe status of the -+ * console. -+ * -+ * When this function returns false then the calling context no longer owns -+ * the console and is no longer allowed to go forward. In this case it must -+ * back out immediately and carefully. The buffer content is also no longer -+ * trusted since it no longer belongs to the calling context. -+ * -+ * Internal helper to avoid duplicated code. ++/* ++ * This acts as a one-way switch to allow legacy consoles to print from ++ * the printk() caller context on a panic CPU. + */ -+static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) ++void printk_legacy_allow_panic_sync(void) +{ -+ struct console *con = ctxt->console; -+ struct nbcon_state cur; -+ struct nbcon_state new; ++ legacy_allow_panic_sync = true; ++} + -+ nbcon_state_read(con, &cur); + asmlinkage int vprintk_emit(int facility, int level, + const struct dev_printk_info *dev_info, + const char *fmt, va_list args) + { ++ bool do_trylock_unlock = printing_via_unlock; + int printed_len; +- bool in_sched = false; + + /* Suppress unimportant messages after panic happens */ + if (unlikely(suppress_printk)) +@@ -2357,15 +2368,43 @@ asmlinkage int vprintk_emit(int facility, int level, + + if (level == LOGLEVEL_SCHED) { + level = LOGLEVEL_DEFAULT; +- in_sched = true; ++ /* If called from the scheduler, we can not call up(). */ ++ do_trylock_unlock = false; + } + + printk_delay(level); + + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + +- /* If called from the scheduler, we can not call up(). */ +- if (!in_sched && printing_via_unlock) { ++ if (!have_boot_console && have_nbcon_console) { ++ bool is_panic_context = this_cpu_in_panic(); + -+ do { + /* -+ * The unsafe bit must not be cleared if an -+ * unsafe hostile takeover has occurred. ++ * In panic, the legacy consoles are not allowed to print from ++ * the printk calling context unless explicitly allowed. This ++ * gives the safe nbcon consoles a chance to print out all the ++ * panic messages first. This restriction only applies if ++ * there are nbcon consoles registered. + */ -+ if (!unsafe && cur.unsafe_takeover) -+ goto out; -+ -+ if (!nbcon_context_can_proceed(ctxt, &cur)) -+ return false; ++ if (is_panic_context) ++ do_trylock_unlock &= legacy_allow_panic_sync; + -+ new.atom = cur.atom; -+ new.unsafe = unsafe; -+ } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ++ /* ++ * There are situations where nbcon atomic printing should ++ * happen in the printk() caller context: ++ * ++ * - When this CPU is in panic. ++ * ++ * Note that if boot consoles are registered, the ++ * console_lock/console_unlock dance must be relied upon ++ * instead because nbcon consoles cannot print simultaneously ++ * with boot consoles. ++ */ ++ if (is_panic_context) ++ nbcon_atomic_flush_all(); ++ } + -+ cur.atom = new.atom; -+out: -+ return nbcon_context_can_proceed(ctxt, &cur); ++ if (do_trylock_unlock) { + /* + * The caller may be holding system-critical or + * timing-sensitive locks. Disable preemption during +@@ -2385,10 +2424,10 @@ asmlinkage int vprintk_emit(int facility, int level, + preempt_enable(); + } + +- if (in_sched) +- defer_console_output(); +- else ++ if (do_trylock_unlock) + wake_up_klogd(); ++ else ++ defer_console_output(); + + return printed_len; + } +-- +2.51.0 + +From 07d46507a4ddafcb3b580f5abe67f4ed506a90b4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 11 Sep 2023 15:21:57 +0000 +Subject: [PATCH 138/213] printk: nbcon: Implement emergency sections + +In emergency situations (something has gone wrong but the +system continues to operate), usually important information +(such as a backtrace) is generated via printk(). Each +individual printk record has little meaning. It is the +collection of printk messages that is most often needed by +developers and users. + +In order to help ensure that the collection of printk messages +in an emergency situation are all stored to the ringbuffer as +quickly as possible, disable console output for that CPU while +it is in the emergency situation. When exiting the emergency +situation, trigger the consoles to be flushed. + +Add per-CPU emergency nesting tracking because an emergency +can arise while in an emergency situation. + +Add functions to mark the beginning and end of emergency +sections where the urgent messages are generated. + +Do not print if the current CPU is in an emergency state. + +Trigger console flushing when exiting all emergency nesting. + +Note that the emergency state is not system-wide. While one CPU +is in an emergency state, another CPU may continue to print +console messages. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 4 ++ + include/linux/printk.h | 7 +++- + kernel/printk/nbcon.c | 81 +++++++++++++++++++++++++++++++++++++++++ + kernel/printk/printk.c | 25 ++++++++++--- + 4 files changed, 109 insertions(+), 8 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 4c2bebe734bf..37300971f0ec 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -463,10 +463,14 @@ static inline bool console_is_registered(const struct console *con) + hlist_for_each_entry(con, &console_list, node) + + #ifdef CONFIG_PRINTK ++extern void nbcon_cpu_emergency_enter(void); ++extern void nbcon_cpu_emergency_exit(void); + extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); + extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); + extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); + #else ++static inline void nbcon_cpu_emergency_enter(void) { } ++static inline void nbcon_cpu_emergency_exit(void) { } + static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } + static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } + static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +diff --git a/include/linux/printk.h b/include/linux/printk.h +index aeab9888e4ee..ebebc32e78de 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -197,6 +197,7 @@ void show_regs_print_info(const char *log_lvl); + extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; + extern asmlinkage void dump_stack(void) __cold; + void printk_trigger_flush(void); ++void printk_legacy_allow_panic_sync(void); + extern void nbcon_acquire(struct uart_port *up); + extern void nbcon_release(struct uart_port *up); + void nbcon_atomic_flush_unsafe(void); +@@ -280,6 +281,10 @@ static inline void printk_trigger_flush(void) + { + } + ++static inline void printk_legacy_allow_panic_sync(void) ++{ +} ++ + static inline void nbcon_acquire(struct uart_port *up) + { + } +@@ -782,5 +787,3 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, + print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) + + #endif +- +-void printk_legacy_allow_panic_sync(void); +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 1c01f88d596d..c3ee245397f6 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -929,6 +929,29 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + return nbcon_context_exit_unsafe(ctxt); + } + ++/* Track the nbcon emergency nesting per CPU. */ ++static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); ++static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; + +/** -+ * nbcon_enter_unsafe - Enter an unsafe region in the driver -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Return: True if this context still owns the console. False if -+ * ownership was handed over or taken. ++ * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer + * -+ * When this function returns false then the calling context no longer owns -+ * the console and is no longer allowed to go forward. In this case it must -+ * back out immediately and carefully. The buffer content is also no longer -+ * trusted since it no longer belongs to the calling context. ++ * Return: Either a pointer to the per CPU emergency nesting counter of ++ * the current CPU or to the init data during early boot. + */ -+bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) ++static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) +{ -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ /* ++ * The value of __printk_percpu_data_ready gets set in normal ++ * context and before SMP initialization. As a result it could ++ * never change while inside an nbcon emergency section. ++ */ ++ if (!printk_percpu_data_ready()) ++ return &early_nbcon_pcpu_emergency_nesting; + -+ return nbcon_context_enter_unsafe(ctxt); ++ return this_cpu_ptr(&nbcon_pcpu_emergency_nesting); +} -+EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); + -+/** -+ * nbcon_exit_unsafe - Exit an unsafe region in the driver -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Return: True if this context still owns the console. False if -+ * ownership was handed over or taken. -+ * -+ * When this function returns false then the calling context no longer owns -+ * the console and is no longer allowed to go forward. In this case it must -+ * back out immediately and carefully. The buffer content is also no longer -+ * trusted since it no longer belongs to the calling context. -+ */ -+bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) -+{ -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + /** + * nbcon_atomic_emit_one - Print one record for an nbcon console using the + * write_atomic() callback +@@ -971,9 +994,15 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) + */ + enum nbcon_prio nbcon_get_default_prio(void) + { ++ unsigned int *cpu_emergency_nesting; + -+ return nbcon_context_exit_unsafe(ctxt); -+} -+EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + if (this_cpu_in_panic()) + return NBCON_PRIO_PANIC; + ++ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); ++ if (*cpu_emergency_nesting) ++ return NBCON_PRIO_EMERGENCY; + + return NBCON_PRIO_NORMAL; + } + +@@ -1110,6 +1139,58 @@ void nbcon_atomic_flush_unsafe(void) + __nbcon_atomic_flush_all(prb_next_reserve_seq(prb), true); + } + +/** -+ * nbcon_reacquire - Reacquire a console after losing ownership -+ * @wctxt: The write context that was handed to the write function ++ * nbcon_cpu_emergency_enter - Enter an emergency section where printk() ++ * messages for that CPU are only stored + * -+ * Since ownership can be lost at any time due to handover or takeover, a -+ * printing context _should_ be prepared to back out immediately and -+ * carefully. However, there are many scenarios where the context _must_ -+ * reacquire ownership in order to finalize or revert hardware changes. ++ * Upon exiting the emergency section, all stored messages are flushed. + * -+ * This function allows a context to reacquire ownership using the same -+ * priority as its previous ownership. ++ * Context: Any context. Disables preemption. + * -+ * Note that for printing contexts, after a successful reacquire the -+ * context will have no output buffer because that has been lost. This -+ * function cannot be used to resume printing. ++ * When within an emergency section, no printing occurs on that CPU. This ++ * is to allow all emergency messages to be dumped into the ringbuffer before ++ * flushing the ringbuffer. The actual printing occurs when exiting the ++ * outermost emergency section. + */ -+void nbcon_reacquire(struct nbcon_write_context *wctxt) ++void nbcon_cpu_emergency_enter(void) +{ -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ struct nbcon_state cur; ++ unsigned int *cpu_emergency_nesting; + -+ while (!nbcon_context_try_acquire(ctxt)) -+ cpu_relax(); ++ preempt_disable(); + -+ wctxt->outbuf = NULL; -+ wctxt->len = 0; -+ nbcon_state_read(con, &cur); -+ wctxt->unsafe_takeover = cur.unsafe_takeover; ++ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); ++ (*cpu_emergency_nesting)++; +} -+EXPORT_SYMBOL_GPL(nbcon_reacquire); + +/** -+ * nbcon_emit_next_record - Emit a record in the acquired context -+ * @wctxt: The write context that will be handed to the write function -+ * @use_atomic: True if the write_atomic callback is to be used -+ * -+ * Return: True if this context still owns the console. False if -+ * ownership was handed over or taken. -+ * -+ * When this function returns false then the calling context no longer owns -+ * the console and is no longer allowed to go forward. In this case it must -+ * back out immediately and carefully. The buffer content is also no longer -+ * trusted since it no longer belongs to the calling context. If the caller -+ * wants to do more it must reacquire the console first. ++ * nbcon_cpu_emergency_exit - Exit an emergency section and flush the ++ * stored messages + * -+ * When true is returned, @wctxt->ctxt.backlog indicates whether there are -+ * still records pending in the ringbuffer, ++ * Flushing only occurs when exiting all nesting for the CPU. ++ * ++ * Context: Any context. Enables preemption. + */ -+static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) ++void nbcon_cpu_emergency_exit(void) +{ -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; -+ struct printk_message pmsg = { -+ .pbufs = ctxt->pbufs, -+ }; -+ unsigned long con_dropped; -+ struct nbcon_state cur; -+ unsigned long dropped; -+ bool done = false; ++ unsigned int *cpu_emergency_nesting; ++ bool do_trigger_flush = false; + -+ /* -+ * The printk buffers are filled within an unsafe section. This -+ * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from -+ * clobbering each other. -+ */ ++ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + -+ if (!nbcon_context_enter_unsafe(ctxt)) -+ return false; ++ WARN_ON_ONCE(*cpu_emergency_nesting == 0); + -+ ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); -+ if (!ctxt->backlog) -+ return nbcon_context_exit_unsafe(ctxt); ++ if (*cpu_emergency_nesting == 1) ++ do_trigger_flush = true; + -+ /* -+ * @con->dropped is not protected in case of an unsafe hostile -+ * takeover. In that situation the update can be racy so -+ * annotate it accordingly. -+ */ -+ con_dropped = data_race(READ_ONCE(con->dropped)); ++ /* Undo the nesting count of nbcon_cpu_emergency_enter(). */ ++ (*cpu_emergency_nesting)--; + -+ dropped = con_dropped + pmsg.dropped; -+ if (dropped && !is_extended) -+ console_prepend_dropped(&pmsg, dropped); ++ preempt_enable(); + -+ if (!nbcon_context_exit_unsafe(ctxt)) -+ return false; ++ if (do_trigger_flush) ++ printk_trigger_flush(); ++} + -+ /* For skipped records just update seq/dropped in @con. */ -+ if (pmsg.outbuf_len == 0) -+ goto update_con; + /** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 11a83a1651c4..86068a23386a 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2411,16 +2411,29 @@ asmlinkage int vprintk_emit(int facility, int level, + * printing of all remaining records to all consoles so that + * this context can return as soon as possible. Hopefully + * another printk() caller will take over the printing. ++ * ++ * Also, nbcon_get_default_prio() requires migration disabled. + */ + preempt_disable(); + -+ /* Initialize the write context for driver callbacks. */ -+ wctxt->outbuf = &pmsg.pbufs->outbuf[0]; -+ wctxt->len = pmsg.outbuf_len; -+ nbcon_state_read(con, &cur); -+ wctxt->unsafe_takeover = cur.unsafe_takeover; + /* +- * Try to acquire and then immediately release the console +- * semaphore. The release will print out buffers. With the +- * spinning variant, this context tries to take over the +- * printing from another printing context. ++ * Do not emit for EMERGENCY priority. The console will be ++ * explicitly flushed when exiting the emergency section. + */ +- if (console_trylock_spinning()) +- console_unlock(); ++ if (nbcon_get_default_prio() == NBCON_PRIO_EMERGENCY) { ++ do_trylock_unlock = false; ++ } else { ++ /* ++ * Try to acquire and then immediately release the ++ * console semaphore. The release will print out ++ * buffers. With the spinning variant, this context ++ * tries to take over the printing from another ++ * printing context. ++ */ ++ if (console_trylock_spinning()) ++ console_unlock(); ++ } ++ + preempt_enable(); + } + +-- +2.51.0 + +From f8f6719e16feb46b0d4e1fbddad81b707f12da56 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 11 Sep 2023 15:53:04 +0000 +Subject: [PATCH 139/213] panic: Mark emergency section in warn + +Mark the full contents of __warn() as an emergency section. In +this section, the CPU will not perform console output for the +printk() calls. Instead, a flushing of the console output will +triggered when exiting the emergency section. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/panic.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/kernel/panic.c b/kernel/panic.c +index f087bfd4c521..f4d3527dfb4d 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -663,6 +663,8 @@ struct warn_args { + void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args) + { ++ nbcon_cpu_emergency_enter(); ++ + disable_trace_on_warning(); + + if (file) +@@ -693,6 +695,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + + /* Just a warning, don't kill lockdep. */ + add_taint(taint, LOCKDEP_STILL_OK); ++ ++ nbcon_cpu_emergency_exit(); + } + + #ifdef CONFIG_BUG +-- +2.51.0 + +From 96d0ce1f816b12c20649d92e839ffe13b0fb2865 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 19 Sep 2023 17:07:34 +0000 +Subject: [PATCH 140/213] panic: Mark emergency section in oops + +Mark an emergency section beginning with oops_enter() until the +end of oops_exit(). In this section, the CPU will not perform +console output for the printk() calls. Instead, a flushing of the +console output will triggered when exiting the emergency section. + +The very end of oops_exit() performs a kmsg_dump(). This is not +included in the emergency section because it is another +flushing mechanism that should occur after the consoles have +been triggered to flush. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/panic.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/panic.c b/kernel/panic.c +index f4d3527dfb4d..762e2fc4d478 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -630,6 +630,7 @@ bool oops_may_print(void) + */ + void oops_enter(void) + { ++ nbcon_cpu_emergency_enter(); + tracing_off(); + /* can't trust the integrity of the kernel anymore: */ + debug_locks_off(); +@@ -652,6 +653,7 @@ void oops_exit(void) + { + do_oops_enter_exit(); + print_oops_end_marker(); ++ nbcon_cpu_emergency_exit(); + kmsg_dump(KMSG_DUMP_OOPS); + } + +-- +2.51.0 + +From 011c650fb21a5ebd0add0675bfa408d6439fb737 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 11 Sep 2023 15:53:39 +0000 +Subject: [PATCH 141/213] rcu: Mark emergency section in rcu stalls + +Mark an emergency section within print_other_cpu_stall(), where +RCU stall information is printed. In this section, the CPU will +not perform console output for the printk() calls. Instead, a +flushing of the console output will triggered when exiting the +emergency section. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/tree_stall.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h +index aab91040b83b..cfbbfc09bbf7 100644 +--- a/kernel/rcu/tree_stall.h ++++ b/kernel/rcu/tree_stall.h +@@ -8,6 +8,7 @@ + */ + + #include ++#include + + ////////////////////////////////////////////////////////////////////////////// + // +@@ -604,6 +605,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) + if (rcu_stall_is_suppressed()) + return; + ++ nbcon_cpu_emergency_enter(); ++ + /* + * OK, time to rat on our buddy... + * See Documentation/RCU/stallwarn.rst for info on how to debug +@@ -658,6 +661,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) + panic_on_rcu_stall(); + + rcu_force_quiescent_state(); /* Kick them all. */ ++ ++ nbcon_cpu_emergency_exit(); + } + + static void print_cpu_stall(unsigned long gps) +-- +2.51.0 + +From 917a6d693fb79a64e8b8f39ed007a0d2ac2c7a5b Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 18 Sep 2023 20:27:41 +0000 +Subject: [PATCH 142/213] lockdep: Mark emergency section in lockdep splats + +Mark an emergency section within print_usage_bug(), where +lockdep bugs are printed. In this section, the CPU will not +perform console output for the printk() calls. Instead, a +flushing of the console output will triggered when exiting +the emergency section. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/lockdep.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index 9419a79e8833..897b03afb7d1 100644 +--- a/kernel/locking/lockdep.c ++++ b/kernel/locking/lockdep.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + +@@ -3971,6 +3972,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, + if (!debug_locks_off() || debug_locks_silent) + return; + ++ nbcon_cpu_emergency_enter(); ++ + pr_warn("\n"); + pr_warn("================================\n"); + pr_warn("WARNING: inconsistent lock state\n"); +@@ -3999,6 +4002,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, + + pr_warn("\nstack backtrace:\n"); + dump_stack(); ++ ++ nbcon_cpu_emergency_exit(); + } + + /* +-- +2.51.0 + +From 4ad60f916a2f1604e7e6164db3e6b02ed5e474b5 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 22 Sep 2023 14:12:21 +0000 +Subject: [PATCH 143/213] printk: nbcon: Introduce printing kthreads + +Provide the main implementation for running a printer kthread +per nbcon console that is takeover/handover aware. + +The main print function nbcon_emit_next_record() will generate +a warning if a task other than the dedicated printer thread +tries to print using write_thread(). + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 8 ++ + kernel/printk/internal.h | 25 ++++++ + kernel/printk/nbcon.c | 188 ++++++++++++++++++++++++++++++++++++++- + kernel/printk/printk.c | 31 +++++++ + 4 files changed, 249 insertions(+), 3 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 37300971f0ec..83ffc60e61b1 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + + struct vc_data; +@@ -301,12 +302,15 @@ struct nbcon_write_context { + * @node: hlist node for the console list + * + * @write_atomic: Write callback for atomic context ++ * @write_thread: Write callback for non-atomic context + * @driver_enter: Callback to begin synchronization with driver code + * @driver_exit: Callback to finish synchronization with driver code + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer + * @locked_port: True, if the port lock is locked by nbcon ++ * @kthread: Printer kthread for this console ++ * @rcuwait: RCU-safe wait object for @kthread waking + */ + struct console { + char name[16]; +@@ -330,12 +334,16 @@ struct console { + /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); ++ bool (*write_thread)(struct console *con, ++ struct nbcon_write_context *wctxt); + void (*driver_enter)(struct console *con, unsigned long *flags); + void (*driver_exit)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; + bool locked_port; ++ struct task_struct *kthread; ++ struct rcuwait rcuwait; + }; + + #ifdef CONFIG_LOCKDEP +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index e2675981dfc5..4de36691009b 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -92,6 +92,7 @@ void nbcon_free(struct console *con); + enum nbcon_prio nbcon_get_default_prio(void); + void nbcon_atomic_flush_all(void); + bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie); ++void nbcon_kthread_create(struct console *con); + + /* + * Check if the given console is currently capable and allowed to print +@@ -110,6 +111,8 @@ static inline bool console_is_usable(struct console *con, short flags) + if (flags & CON_NBCON) { + if (!con->write_atomic) + return false; ++ if (!con->write_thread || !con->kthread) ++ return false; + } else { + if (!con->write) + return false; +@@ -126,12 +129,34 @@ static inline bool console_is_usable(struct console *con, short flags) + return true; + } + ++/** ++ * nbcon_kthread_wake - Wake up a printk thread ++ * @con: Console to operate on ++ */ ++static inline void nbcon_kthread_wake(struct console *con) ++{ ++ /* ++ * Guarantee any new records can be seen by tasks preparing to wait ++ * before this context checks if the rcuwait is empty. ++ * ++ * The full memory barrier in rcuwait_wake_up() pairs with the full ++ * memory barrier within set_current_state() of ++ * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait() ++ * adds the waiter but before it has checked the wait condition. ++ * ++ * This pairs with nbcon_kthread_func:A. ++ */ ++ rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */ ++} + + #else + + #define PRINTK_PREFIX_MAX 0 + #define PRINTK_MESSAGE_MAX 0 + #define PRINTKRB_RECORD_MAX 0 + ++static inline void nbcon_kthread_wake(struct console *con) { } ++static inline void nbcon_kthread_create(struct console *con) { } + #define printing_via_unlock (false) + + /* +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index c3ee245397f6..1becdfc7772c 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -5,8 +5,10 @@ + #include + #include + #include ++#include + #include + #include ++#include "printk_ringbuffer.h" + #include "internal.h" + /* + * Printk console printing implementation for consoles which does not depend +@@ -828,6 +830,7 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + /** + * nbcon_emit_next_record - Emit a record in the acquired context + * @wctxt: The write context that will be handed to the write function ++ * @use_atomic: True if the write_atomic callback is to be used + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. +@@ -841,7 +844,7 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + * When true is returned, @wctxt->ctxt.backlog indicates whether there are + * still records pending in the ringbuffer, + */ +-static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) ++static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) + { + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; +@@ -891,9 +894,17 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; + +- if (con->write_atomic) + if (use_atomic && + con->write_atomic) { -+ done = con->write_atomic(con, wctxt); -+ + done = con->write_atomic(con, wctxt); + + } else if (!use_atomic && + con->write_thread && + con->kthread) { @@ -11760,41 +19397,13 @@ index 000000000..b4278854e + done = con->write_thread(con, wctxt); + } + -+ if (!done) { -+ /* -+ * The emit was aborted, probably due to a loss of ownership. -+ * Ensure ownership was lost or released before reporting the -+ * loss. -+ */ -+ nbcon_context_release(ctxt); -+ return false; -+ } -+ -+ /* -+ * Since any dropped message was successfully output, reset the -+ * dropped count for the console. -+ */ -+ dropped = 0; -+update_con: -+ /* -+ * The dropped count and the sequence number are updated within an -+ * unsafe section. This limits update races to the panic context and -+ * allows the panic context to win. -+ */ -+ -+ if (!nbcon_context_enter_unsafe(ctxt)) -+ return false; -+ -+ if (dropped != con_dropped) { -+ /* Counterpart to the READ_ONCE() above. */ -+ WRITE_ONCE(con->dropped, dropped); -+ } -+ -+ nbcon_seq_try_update(ctxt, pmsg.seq + 1); -+ -+ return nbcon_context_exit_unsafe(ctxt); -+} -+ + if (!done) { + /* + * The emit was aborted, probably due to a loss of ownership. +@@ -929,6 +940,118 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) + return nbcon_context_exit_unsafe(ctxt); + } + +/** + * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup + * @con: Console to operate on @@ -11818,7 +19427,7 @@ index 000000000..b4278854e + + cookie = console_srcu_read_lock(); + flags = console_srcu_read_flags(con); -+ is_usable = console_is_usable(con, flags, false); ++ is_usable = console_is_usable(con, flags); + console_srcu_read_unlock(cookie); + + if (!is_usable) @@ -11877,7 +19486,7 @@ index 000000000..b4278854e + + con_flags = console_srcu_read_flags(con); + -+ if (console_is_usable(con, con_flags, false)) { ++ if (console_is_usable(con, con_flags)) { + con->driver_enter(con, &flags); + + /* @@ -11896,18 +19505,418 @@ index 000000000..b4278854e + backlog = ctxt->backlog; + } + } -+ -+ con->driver_exit(con, flags); -+ } -+ -+ console_srcu_read_unlock(cookie); -+ cond_resched(); -+ -+ } while (backlog); -+ -+ goto wait_for_event; -+} -+ ++ ++ con->driver_exit(con, flags); ++ } ++ ++ console_srcu_read_unlock(cookie); ++ ++ } while (backlog); ++ ++ goto wait_for_event; ++} ++ + /* Track the nbcon emergency nesting per CPU. */ + static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); + static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; +@@ -976,7 +1099,7 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) + * handed over or taken over. In both cases the context is no + * longer valid. + */ +- if (!nbcon_emit_next_record(wctxt)) ++ if (!nbcon_emit_next_record(wctxt, true)) + return false; + + nbcon_context_release(ctxt); +@@ -1191,6 +1314,63 @@ void nbcon_cpu_emergency_exit(void) + printk_trigger_flush(); + } + ++/** ++ * nbcon_kthread_stop - Stop a printer thread ++ * @con: Console to operate on ++ */ ++static void nbcon_kthread_stop(struct console *con) ++{ ++ lockdep_assert_console_list_lock_held(); ++ ++ if (!con->kthread) ++ return; ++ ++ kthread_stop(con->kthread); ++ con->kthread = NULL; ++} ++ ++/** ++ * nbcon_kthread_create - Create a printer thread ++ * @con: Console to operate on ++ * ++ * If it fails, let the console proceed. The atomic part might ++ * be usable and useful. ++ */ ++void nbcon_kthread_create(struct console *con) ++{ ++ struct task_struct *kt; ++ ++ lockdep_assert_console_list_lock_held(); ++ ++ if (!(con->flags & CON_NBCON) || !con->write_thread) ++ return; ++ ++ if (con->kthread) ++ return; ++ ++ /* ++ * Printer threads cannot be started as long as any boot console is ++ * registered because there is no way to synchronize the hardware ++ * registers between boot console code and regular console code. ++ */ ++ if (have_boot_console) ++ return; ++ ++ kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); ++ if (IS_ERR(kt)) { ++ con_printk(KERN_ERR, con, "failed to start printing thread\n"); ++ return; ++ } ++ ++ con->kthread = kt; ++ ++ /* ++ * It is important that console printing threads are scheduled ++ * shortly after a printk call and with generous runtime budgets. ++ */ ++ sched_set_normal(con->kthread, -20); ++} ++ + /** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for +@@ -1237,6 +1417,7 @@ void nbcon_init(struct console *con) + /* nbcon_alloc() must have been called and successful! */ + BUG_ON(!con->pbufs); + ++ rcuwait_init(&con->rcuwait); + nbcon_seq_force(con, con->seq); + nbcon_state_set(con, &state); + } +@@ -1249,6 +1430,7 @@ void nbcon_free(struct console *con) + { + struct nbcon_state state = { }; + ++ nbcon_kthread_stop(con); + nbcon_state_set(con, &state); + + /* Boot consoles share global printk buffers. */ +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 86068a23386a..8e2fd0d05667 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2688,6 +2688,8 @@ void suspend_console(void) + void resume_console(void) + { + struct console *con; ++ short flags; ++ int cookie; + + if (!console_suspend_enabled) + return; +@@ -2704,6 +2706,14 @@ void resume_console(void) + */ + synchronize_srcu(&console_srcu); + ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ flags = console_srcu_read_flags(con); ++ if (flags & CON_NBCON) ++ nbcon_kthread_wake(con); ++ } ++ console_srcu_read_unlock(cookie); ++ + pr_flush(1000, true); + } + +@@ -3024,6 +3034,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + u64 printk_seq; + bool progress; + ++ /* ++ * console_flush_all() is only for legacy consoles, ++ * unless the nbcon console has no kthread printer. ++ */ ++ if ((flags & CON_NBCON) && con->kthread) ++ continue; ++ + if (!console_is_usable(con, flags)) + continue; + any_usable = true; +@@ -3329,9 +3346,23 @@ EXPORT_SYMBOL(console_stop); + + void console_start(struct console *console) + { ++ short flags; ++ + console_list_lock(); + console_srcu_write_flags(console, console->flags | CON_ENABLED); ++ flags = console->flags; + console_list_unlock(); ++ ++ /* ++ * Ensure that all SRCU list walks have completed. The related ++ * printing context must be able to see it is enabled so that ++ * it is guaranteed to wake up and resume printing. ++ */ ++ synchronize_srcu(&console_srcu); ++ ++ if (flags & CON_NBCON) ++ nbcon_kthread_wake(console); ++ + __pr_flush(console, 1000, true); + } + EXPORT_SYMBOL(console_start); +-- +2.51.0 + +From 7541769b97753238fd5169e0a9a7ad246915b783 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 23 Oct 2023 17:43:48 +0000 +Subject: [PATCH 144/213] printk: Atomic print in printk context on shutdown + +For nbcon consoles, normally the printing is handled by the +dedicated console printing threads. However, on shutdown the +printing threads may not get a chance to print the final +messages. + +When shutting down or rebooting (system_state > SYSTEM_RUNNING), +perform atomic printing from the printk() caller context. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 8e2fd0d05667..904cff9b359d 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2395,13 +2395,18 @@ asmlinkage int vprintk_emit(int facility, int level, + * + * - When this CPU is in panic. + * ++ * - During shutdown, since the printing threads may not get ++ * a chance to print the final messages. ++ * + * Note that if boot consoles are registered, the + * console_lock/console_unlock dance must be relied upon + * instead because nbcon consoles cannot print simultaneously + * with boot consoles. + */ +- if (is_panic_context) ++ if (is_panic_context || ++ (system_state > SYSTEM_RUNNING)) { + nbcon_atomic_flush_all(); ++ } + } + + if (do_trylock_unlock) { +-- +2.51.0 + +From 74210bc92b3dca6b60f7006b467cba51b9100cb0 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 26 Sep 2023 14:43:30 +0000 +Subject: [PATCH 145/213] printk: nbcon: Add context to console_is_usable() + +The nbcon consoles have two callbacks to be used for different +contexts. In order to determine if an nbcon console is usable, +console_is_usable() needs to know if it is a context that will +use the write_atomic() callback or the write_thread() callback. + +Add an extra parameter @use_atomic to specify this. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 16 ++++++++++------ + kernel/printk/nbcon.c | 6 +++--- + kernel/printk/printk.c | 6 ++++-- + 3 files changed, 17 insertions(+), 11 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 4de36691009b..e5eb7dc25e0a 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -100,7 +100,7 @@ void nbcon_kthread_create(struct console *con); + * which can also play a role in deciding if @con can be used to print + * records. + */ +-static inline bool console_is_usable(struct console *con, short flags) ++static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) + { + if (!(flags & CON_ENABLED)) + return false; +@@ -109,10 +109,13 @@ static inline bool console_is_usable(struct console *con, short flags) + return false; + + if (flags & CON_NBCON) { +- if (!con->write_atomic) +- return false; +- if (!con->write_thread || !con->kthread) +- return false; ++ if (use_atomic) { ++ if (!con->write_atomic) ++ return false; ++ } else { ++ if (!con->write_thread || !con->kthread) ++ return false; ++ } + } else { + if (!con->write) + return false; +@@ -178,7 +181,8 @@ static inline void nbcon_atomic_flush_all(void) { } + static inline bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, + int cookie) { return false; } + +-static inline bool console_is_usable(struct console *con, short flags) { return false; } ++static inline bool console_is_usable(struct console *con, short flags, ++ bool use_atomic) { return false; } + + #endif /* CONFIG_PRINTK */ + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 1becdfc7772c..bb071193ab6e 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -963,7 +963,7 @@ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_contex + + cookie = console_srcu_read_lock(); + flags = console_srcu_read_flags(con); +- is_usable = console_is_usable(con, flags); ++ is_usable = console_is_usable(con, flags, false); + console_srcu_read_unlock(cookie); + + if (!is_usable) +@@ -1022,7 +1022,7 @@ static int nbcon_kthread_func(void *__console) + + con_flags = console_srcu_read_flags(con); + +- if (console_is_usable(con, con_flags)) { ++ if (console_is_usable(con, con_flags, false)) { + con->driver_enter(con, &flags); + + /* +@@ -1203,7 +1203,7 @@ static void __nbcon_atomic_flush_all(u64 stop_seq, bool allow_unsafe_takeover) + if (!(flags & CON_NBCON)) + continue; + +- if (!console_is_usable(con, flags)) ++ if (!console_is_usable(con, flags, true)) + continue; + + if (nbcon_seq_read(con) >= stop_seq) +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 904cff9b359d..ee0e42b64d56 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3046,7 +3046,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + if ((flags & CON_NBCON) && con->kthread) + continue; + +- if (!console_is_usable(con, flags)) ++ if (!console_is_usable(con, flags, true)) + continue; + any_usable = true; + +@@ -3976,8 +3976,10 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + * that they make forward progress, so only increment + * @diff for usable consoles. + */ +- if (!console_is_usable(c, flags)) ++ if (!console_is_usable(c, flags, true) && ++ !console_is_usable(c, flags, false)) { + continue; ++ } + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(c); +-- +2.51.0 + +From c02887482bd8b3cc3921a0e11769840bd3bbc24a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 26 Sep 2023 13:03:52 +0000 +Subject: [PATCH 146/213] printk: nbcon: Add printer thread wakeups + +Add a function to wakeup the printer threads. Use the new function +when: + + - records are added to the printk ringbuffer + - consoles are resumed + - triggered via printk_trigger_flush() + +The actual waking is performed via irq_work so that the wakeup can +be triggered from any context. + +Co-developed-by: John Ogness +Signed-off-by: John Ogness +Signed-off-by: Thomas Gleixner (Intel) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 3 +++ + kernel/printk/internal.h | 1 + + kernel/printk/nbcon.c | 56 ++++++++++++++++++++++++++++++++++++++++ + kernel/printk/printk.c | 7 +++++ + 4 files changed, 67 insertions(+) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 83ffc60e61b1..f17a8b77bd90 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -16,6 +16,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -311,6 +312,7 @@ struct nbcon_write_context { + * @locked_port: True, if the port lock is locked by nbcon + * @kthread: Printer kthread for this console + * @rcuwait: RCU-safe wait object for @kthread waking ++ * @irq_work: Defer @kthread waking to IRQ work context + */ + struct console { + char name[16]; +@@ -344,6 +346,7 @@ struct console { + bool locked_port; + struct task_struct *kthread; + struct rcuwait rcuwait; ++ struct irq_work irq_work; + }; + + #ifdef CONFIG_LOCKDEP +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index e5eb7dc25e0a..578623e7f0fc 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -93,6 +93,7 @@ enum nbcon_prio nbcon_get_default_prio(void); + void nbcon_atomic_flush_all(void); + bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie); + void nbcon_kthread_create(struct console *con); ++void nbcon_wake_threads(void); + + /* + * Check if the given console is currently capable and allowed to print +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index bb071193ab6e..337ec7a10095 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1052,6 +1052,61 @@ static int nbcon_kthread_func(void *__console) + goto wait_for_event; + } + +/** + * nbcon_irq_work - irq work to wake printk thread + * @irq_work: The irq work to operate on @@ -11921,6 +19930,9 @@ index 000000000..b4278854e + +static inline bool rcuwait_has_sleeper(struct rcuwait *w) +{ ++ bool has_sleeper; ++ ++ rcu_read_lock(); + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. @@ -11933,7 +19945,10 @@ index 000000000..b4278854e + * This pairs with nbcon_kthread_func:A. + */ + smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ -+ return rcuwait_active(w); ++ has_sleeper = !!rcu_dereference(w->task); ++ rcu_read_unlock(); ++ ++ return has_sleeper; +} + +/** @@ -11957,3218 +19972,5820 @@ index 000000000..b4278854e + console_srcu_read_unlock(cookie); +} + -+/* Track the nbcon emergency nesting per CPU. */ -+static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); -+static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; + /* Track the nbcon emergency nesting per CPU. */ + static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); + static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; +@@ -1418,6 +1473,7 @@ void nbcon_init(struct console *con) + BUG_ON(!con->pbufs); + + rcuwait_init(&con->rcuwait); ++ init_irq_work(&con->irq_work, nbcon_irq_work); + nbcon_seq_force(con, con->seq); + nbcon_state_set(con, &state); + } +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index ee0e42b64d56..31a05ccc9008 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2409,6 +2409,8 @@ asmlinkage int vprintk_emit(int facility, int level, + } + } + ++ nbcon_wake_threads(); ++ + if (do_trylock_unlock) { + /* + * The caller may be holding system-critical or +@@ -2711,6 +2713,10 @@ void resume_console(void) + */ + synchronize_srcu(&console_srcu); + ++ /* ++ * Since this runs in task context, wake the threaded printers ++ * directly rather than scheduling irq_work to do it. ++ */ + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + flags = console_srcu_read_flags(con); +@@ -4128,6 +4134,7 @@ void defer_console_output(void) + + void printk_trigger_flush(void) + { ++ nbcon_wake_threads(); + defer_console_output(); + } + +-- +2.51.0 + +From 94576ea906888293182e8f20b83b1d35e2f3e2b1 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 26 Sep 2023 13:04:15 +0000 +Subject: [PATCH 147/213] printk: nbcon: Stop threads on shutdown/reboot + +Register a syscore_ops shutdown function to stop all threaded +printers on shutdown/reboot. This allows printk to transition back +to atomic printing in order to provide a robust mechanism for +outputting the final messages. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/nbcon.c | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 337ec7a10095..57ed4968a653 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include "printk_ringbuffer.h" + #include "internal.h" + /* +@@ -1577,3 +1578,33 @@ void nbcon_release(struct uart_port *up) + con->locked_port = false; + } + EXPORT_SYMBOL_GPL(nbcon_release); ++ ++/** ++ * printk_kthread_shutdown - shutdown all threaded printers ++ * ++ * On system shutdown all threaded printers are stopped. This allows printk ++ * to transition back to atomic printing, thus providing a robust mechanism ++ * for the final shutdown/reboot messages to be output. ++ */ ++static void printk_kthread_shutdown(void) ++{ ++ struct console *con; ++ ++ console_list_lock(); ++ for_each_console(con) { ++ if (con->flags & CON_NBCON) ++ nbcon_kthread_stop(con); ++ } ++ console_list_unlock(); ++} ++ ++static struct syscore_ops printk_syscore_ops = { ++ .shutdown = printk_kthread_shutdown, ++}; ++ ++static int __init printk_init_ops(void) ++{ ++ register_syscore_ops(&printk_syscore_ops); ++ return 0; ++} ++device_initcall(printk_init_ops); +-- +2.51.0 + +From 77106d346517034bf99472e33ef8077584813d3b Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 5 Dec 2023 14:09:31 +0000 +Subject: [PATCH 148/213] printk: nbcon: Start printing threads + +If there are no boot consoles, the printing threads are started +in early_initcall. + +If there are boot consoles, the printing threads are started +after the last boot console has unregistered. The printing +threads do not need to be concerned about boot consoles because +boot consoles cannot register once a non-boot console has +registered. + +Until a printing thread of a console has started, that console +will print using atomic_write() in the printk() caller context. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 2 ++ + kernel/printk/nbcon.c | 18 +++++++++++++++++- + kernel/printk/printk.c | 14 ++++++++++++++ + 3 files changed, 33 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 578623e7f0fc..dcf365635f71 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -44,6 +44,7 @@ enum printk_info_flags { + }; + + extern struct printk_ringbuffer *prb; ++extern bool printk_threads_enabled; + extern bool have_legacy_console; + extern bool have_boot_console; + +@@ -161,6 +162,7 @@ static inline void nbcon_kthread_wake(struct console *con) + + static inline void nbcon_kthread_wake(struct console *con) { } + static inline void nbcon_kthread_create(struct console *con) { } ++#define printk_threads_enabled (false) + #define printing_via_unlock (false) + + /* +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 57ed4968a653..b866d0138fe0 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -205,6 +205,8 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) + } + } + ++bool printk_threads_enabled __ro_after_init; ++ + /** + * nbcon_context_try_acquire_direct - Try to acquire directly + * @ctxt: The context of the caller +@@ -1401,7 +1403,7 @@ void nbcon_kthread_create(struct console *con) + if (!(con->flags & CON_NBCON) || !con->write_thread) + return; + +- if (con->kthread) ++ if (!printk_threads_enabled || con->kthread) + return; + + /* +@@ -1427,6 +1429,19 @@ void nbcon_kthread_create(struct console *con) + sched_set_normal(con->kthread, -20); + } + ++static int __init printk_setup_threads(void) ++{ ++ struct console *con; ++ ++ console_list_lock(); ++ printk_threads_enabled = true; ++ for_each_console(con) ++ nbcon_kthread_create(con); ++ console_list_unlock(); ++ return 0; ++} ++early_initcall(printk_setup_threads); + + /** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for +@@ -1477,6 +1492,7 @@ void nbcon_init(struct console *con) + init_irq_work(&con->irq_work, nbcon_irq_work); + nbcon_seq_force(con, con->seq); + nbcon_state_set(con, &state); ++ nbcon_kthread_create(con); + } + + /** +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 31a05ccc9008..08ddc8db7a0d 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2395,6 +2395,9 @@ asmlinkage int vprintk_emit(int facility, int level, + * + * - When this CPU is in panic. + * ++ * - When booting, before the printing threads have been ++ * started. ++ * + * - During shutdown, since the printing threads may not get + * a chance to print the final messages. + * +@@ -2404,6 +2407,7 @@ asmlinkage int vprintk_emit(int facility, int level, + * with boot consoles. + */ + if (is_panic_context || ++ !printk_threads_enabled || + (system_state > SYSTEM_RUNNING)) { + nbcon_atomic_flush_all(); + } +@@ -3716,6 +3720,7 @@ EXPORT_SYMBOL(register_console); + /* Must be called under console_list_lock(). */ + static int unregister_console_locked(struct console *console) + { ++ bool is_boot_con = (console->flags & CON_BOOT); + bool found_legacy_con = false; + bool found_nbcon_con = false; + bool found_boot_con = false; +@@ -3787,6 +3792,15 @@ static int unregister_console_locked(struct console *console) + if (!found_nbcon_con) + have_nbcon_console = false; + ++ /* ++ * When the last boot console unregisters, start up the ++ * printing threads. ++ */ ++ if (is_boot_con && !have_boot_console) { ++ for_each_console(c) ++ nbcon_kthread_create(c); ++ } ++ + return res; + } + +-- +2.51.0 + +From d3a85bc58031040fcd8c6c0a47be9d4f281895c1 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Tue, 26 Sep 2023 13:31:00 +0000 +Subject: [PATCH 149/213] proc: Add nbcon support for /proc/consoles + +Update /proc/consoles output to show 'W' if an nbcon write +callback is implemented (write_atomic or write_thread). + +Also update /proc/consoles output to show 'N' if it is an +nbcon console. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/proc/consoles.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c +index e0758fe7936d..2703676549f5 100644 +--- a/fs/proc/consoles.c ++++ b/fs/proc/consoles.c +@@ -21,12 +21,14 @@ static int show_console_dev(struct seq_file *m, void *v) + { CON_ENABLED, 'E' }, + { CON_CONSDEV, 'C' }, + { CON_BOOT, 'B' }, ++ { CON_NBCON, 'N' }, + { CON_PRINTBUFFER, 'p' }, + { CON_BRL, 'b' }, + { CON_ANYTIME, 'a' }, + }; + char flags[ARRAY_SIZE(con_flags) + 1]; + struct console *con = v; ++ char con_write = '-'; + unsigned int a; + dev_t dev = 0; + +@@ -57,9 +59,15 @@ static int show_console_dev(struct seq_file *m, void *v) + seq_setwidth(m, 21 - 1); + seq_printf(m, "%s%d", con->name, con->index); + seq_pad(m, ' '); +- seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', +- con->write ? 'W' : '-', con->unblank ? 'U' : '-', +- flags); ++ if (con->flags & CON_NBCON) { ++ if (con->write_atomic || con->write_thread) ++ con_write = 'W'; ++ } else { ++ if (con->write) ++ con_write = 'W'; ++ } ++ seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write, ++ con->unblank ? 'U' : '-', flags); + if (dev) + seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); + +-- +2.51.0 + +From 3e267ac56ae0584cca16430c3955b16a5849bb37 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 22 Sep 2023 14:31:09 +0000 +Subject: [PATCH 150/213] tty: sysfs: Add nbcon support for 'active' + +Allow the 'active' attribute to list nbcon consoles. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/tty_io.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c +index 117abcf366d9..03e2eaf24fa5 100644 +--- a/drivers/tty/tty_io.c ++++ b/drivers/tty/tty_io.c +@@ -3543,8 +3543,15 @@ static ssize_t show_cons_active(struct device *dev, + for_each_console(c) { + if (!c->device) + continue; +- if (!c->write) +- continue; ++ if (c->flags & CON_NBCON) { ++ if (!c->write_atomic && ++ !(c->write_thread && c->kthread)) { ++ continue; ++ } ++ } else { ++ if (!c->write) ++ continue; ++ } + if ((c->flags & CON_ENABLED) == 0) + continue; + cs[i++] = c; +-- +2.51.0 + +From de0c42e8c4f787be3e12bc95c16a1ec54a50507c Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 20 Oct 2023 10:01:58 +0000 +Subject: [PATCH 151/213] printk: nbcon: Provide function to reacquire + ownership + +Contexts may become nbcon owners for various reasons, not just +for printing. Indeed, the port->lock wrapper takes ownership +for anything relating to the hardware. + +Since ownership can be lost at any time due to handover or +takeover, a context _should_ be prepared to back out +immediately and carefully. However, there are many scenarios +where the context _must_ reacquire ownership in order to +finalize or revert hardware changes. + +One such example is when interrupts are disabled by a context. +No other context will automagically re-enable the interrupts. +For this case, the disabling context _must_ reacquire nbcon +ownership so that it can re-enable the interrupts. + +Provide nbcon_reacquire() for exactly this purpose. + +Note that for printing contexts, after a successful reacquire +the context will have no output buffer because that has been +lost. nbcon_reacquire() cannot be used to resume printing. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 2 ++ + kernel/printk/nbcon.c | 32 ++++++++++++++++++++++++++++++++ + 2 files changed, 34 insertions(+) + +diff --git a/include/linux/console.h b/include/linux/console.h +index f17a8b77bd90..756e5008d828 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -479,12 +479,14 @@ extern void nbcon_cpu_emergency_exit(void); + extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); + extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); + extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); ++extern void nbcon_reacquire(struct nbcon_write_context *wctxt); + #else + static inline void nbcon_cpu_emergency_enter(void) { } + static inline void nbcon_cpu_emergency_exit(void) { } + static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } + static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } + static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } ++static inline void nbcon_reacquire(struct nbcon_write_context *wctxt) { } + #endif + + extern int console_set_on_cmdline; +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index b866d0138fe0..f843df54ee82 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -830,6 +830,38 @@ bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) + } + EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + +/** -+ * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer ++ * nbcon_reacquire - Reacquire a console after losing ownership ++ * @wctxt: The write context that was handed to the write function + * -+ * Return: Either a pointer to the per CPU emergency nesting counter of -+ * the current CPU or to the init data during early boot. -+ */ -+static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) -+{ -+ /* -+ * The value of __printk_percpu_data_ready gets set in normal -+ * context and before SMP initialization. As a result it could -+ * never change while inside an nbcon emergency section. -+ */ -+ if (!printk_percpu_data_ready()) -+ return &early_nbcon_pcpu_emergency_nesting; -+ -+ return this_cpu_ptr(&nbcon_pcpu_emergency_nesting); -+} -+ -+/** -+ * nbcon_atomic_emit_one - Print one record for an nbcon console using the -+ * write_atomic() callback -+ * @wctxt: An initialized write context struct to use -+ * for this context ++ * Since ownership can be lost at any time due to handover or takeover, a ++ * printing context _should_ be prepared to back out immediately and ++ * carefully. However, there are many scenarios where the context _must_ ++ * reacquire ownership in order to finalize or revert hardware changes. + * -+ * Return: False if the given console could not print a record or there -+ * are no more records to print, otherwise true. ++ * This function allows a context to reacquire ownership using the same ++ * priority as its previous ownership. + * -+ * This is an internal helper to handle the locking of the console before -+ * calling nbcon_emit_next_record(). ++ * Note that for printing contexts, after a successful reacquire the ++ * context will have no output buffer because that has been lost. This ++ * function cannot be used to resume printing. + */ -+static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt) ++void nbcon_reacquire(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ struct nbcon_state cur; + -+ if (!nbcon_context_try_acquire(ctxt)) -+ return false; -+ -+ /* -+ * nbcon_emit_next_record() returns false when the console was -+ * handed over or taken over. In both cases the context is no -+ * longer valid. -+ */ -+ if (!nbcon_emit_next_record(wctxt, true)) -+ return false; -+ -+ nbcon_context_release(ctxt); ++ while (!nbcon_context_try_acquire(ctxt)) ++ cpu_relax(); + -+ return ctxt->backlog; ++ wctxt->outbuf = NULL; ++ wctxt->len = 0; ++ nbcon_state_read(con, &cur); ++ wctxt->unsafe_takeover = cur.unsafe_takeover; +} ++EXPORT_SYMBOL_GPL(nbcon_reacquire); + -+/** -+ * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon -+ * printing on the current CPU -+ * -+ * Context: Any context which could not be migrated to another CPU. -+ * Return: The nbcon_prio to use for acquiring an nbcon console in this -+ * context for printing. -+ */ -+enum nbcon_prio nbcon_get_default_prio(void) + /** + * nbcon_emit_next_record - Emit a record in the acquired context + * @wctxt: The write context that will be handed to the write function +-- +2.51.0 + +From 4aca34da9d03ca21ac6a95efade7860065cea118 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 11 Dec 2023 09:19:18 +0000 +Subject: [PATCH 152/213] serial: core: Provide low-level functions to port + lock + +The nbcon console's driver_enter() and driver_exit() callbacks need +to lock the port lock in order to synchronize against other hardware +activity (such as adjusting baud rates). However, they cannot use +the uart_port_lock() wrappers because the printk subsystem will +perform nbcon locking after calling the driver_enter() callback. + +Provide low-level variants __uart_port_lock_irqsave() and +__uart_port_unlock_irqrestore() for this purpose. These are only +to be used by the driver_enter()/driver_exit() callbacks. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/serial_core.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index 6fc2003767a9..7c861b4606aa 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -682,6 +682,18 @@ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned lo + spin_unlock_irqrestore(&up->lock, flags); + } + ++/* Only for use in the console->driver_enter() callback. */ ++static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ -+ unsigned int *cpu_emergency_nesting; -+ -+ if (this_cpu_in_panic()) -+ return NBCON_PRIO_PANIC; -+ -+ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); -+ if (*cpu_emergency_nesting) -+ return NBCON_PRIO_EMERGENCY; -+ -+ return NBCON_PRIO_NORMAL; ++ spin_lock_irqsave(&up->lock, *flags); +} + -+/** -+ * nbcon_atomic_emit_next_record - Print one record for an nbcon console -+ * using the write_atomic() callback -+ * @con: The console to print on -+ * @handover: Will be set to true if a printk waiter has taken over the -+ * console_lock, in which case the caller is no longer holding -+ * both the console_lock and the SRCU read lock. Otherwise it -+ * is set to false. -+ * @cookie: The cookie from the SRCU read lock. -+ * -+ * Context: Any context which could not be migrated to another CPU. -+ * Return: True if a record could be printed, otherwise false. -+ * -+ * This function is meant to be called by console_flush_all() to print records -+ * on nbcon consoles using the write_atomic() callback. Essentially it is the -+ * nbcon version of console_emit_next_record(). -+ */ -+bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie) ++/* Only for use in the console->driver_exit() callback. */ ++static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ -+ struct nbcon_write_context wctxt = { }; -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); -+ unsigned long driver_flags; -+ bool progress = false; -+ unsigned long flags; -+ -+ *handover = false; -+ -+ /* Use the same locking order as console_emit_next_record(). */ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ printk_safe_enter_irqsave(flags); -+ console_lock_spinning_enable(); -+ stop_critical_timings(); -+ } -+ -+ con->driver_enter(con, &driver_flags); -+ cant_migrate(); -+ -+ ctxt->console = con; -+ ctxt->prio = nbcon_get_default_prio(); -+ -+ progress = nbcon_atomic_emit_one(&wctxt); -+ -+ con->driver_exit(con, driver_flags); -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ start_critical_timings(); -+ *handover = console_lock_spinning_disable_and_check(cookie); -+ printk_safe_exit_irqrestore(flags); -+ } -+ -+ return progress; ++ spin_unlock_irqrestore(&up->lock, flags); +} + -+/** -+ * __nbcon_atomic_flush_all - Flush all nbcon consoles using their -+ * write_atomic() callback -+ * @stop_seq: Flush up until this record -+ * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers -+ */ -+static void __nbcon_atomic_flush_all(u64 stop_seq, bool allow_unsafe_takeover) + static inline int serial_port_in(struct uart_port *up, int offset) + { + return up->serial_in(up, offset); +-- +2.51.0 + +From 788df93e5f414bc03170db154aa97ad2057db9ac Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Wed, 13 Sep 2023 15:30:36 +0000 +Subject: [PATCH 153/213] serial: 8250: Switch to nbcon console + +Implement the necessary callbacks to switch the 8250 console driver +to perform as an nbcon console. + +Add implementations for the nbcon consoles (write_atomic, write_thread, +driver_enter, driver_exit) and add CON_NBCON to the initial flags. + +The legacy code is kept in order to easily switch back to legacy mode +by defining CONFIG_SERIAL_8250_LEGACY_CONSOLE. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/8250/8250_core.c | 42 +++++++- + drivers/tty/serial/8250/8250_port.c | 159 +++++++++++++++++++++++++++- + include/linux/serial_8250.h | 6 ++ + 3 files changed, 204 insertions(+), 3 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c +index 677b4aaf1f0c..efb6f1c1926a 100644 +--- a/drivers/tty/serial/8250/8250_core.c ++++ b/drivers/tty/serial/8250/8250_core.c +@@ -592,6 +592,7 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) + + #ifdef CONFIG_SERIAL_8250_CONSOLE + ++#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE + static void univ8250_console_write(struct console *co, const char *s, + unsigned int count) + { +@@ -599,6 +600,37 @@ static void univ8250_console_write(struct console *co, const char *s, + + serial8250_console_write(up, s, count); + } ++#else ++static bool univ8250_console_write_atomic(struct console *co, ++ struct nbcon_write_context *wctxt) +{ -+ struct nbcon_write_context wctxt = { }; -+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); -+ struct console *con; -+ bool any_progress; -+ int cookie; -+ -+ do { -+ any_progress = false; -+ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ short flags = console_srcu_read_flags(con); -+ unsigned long irq_flags; -+ -+ if (!(flags & CON_NBCON)) -+ continue; ++ struct uart_8250_port *up = &serial8250_ports[co->index]; + -+ if (!console_is_usable(con, flags, true)) -+ continue; ++ return serial8250_console_write_atomic(up, wctxt); ++} + -+ if (nbcon_seq_read(con) >= stop_seq) -+ continue; ++static bool univ8250_console_write_thread(struct console *co, ++ struct nbcon_write_context *wctxt) ++{ ++ struct uart_8250_port *up = &serial8250_ports[co->index]; + -+ memset(ctxt, 0, sizeof(*ctxt)); -+ ctxt->console = con; -+ ctxt->spinwait_max_us = 2000; -+ ctxt->allow_unsafe_takeover = allow_unsafe_takeover; ++ return serial8250_console_write_thread(up, wctxt); ++} + -+ /* -+ * Atomic flushing does not use console driver -+ * synchronization (i.e. it does not hold the port -+ * lock for uart consoles). Therefore IRQs must be -+ * disabled to avoid being interrupted and then -+ * calling into a driver that will deadlock trying -+ * acquire console ownership. -+ * -+ * This also disables migration in order to get the -+ * current CPU priority. -+ */ -+ local_irq_save(irq_flags); ++static void univ8250_console_driver_enter(struct console *con, unsigned long *flags) ++{ ++ struct uart_port *up = &serial8250_ports[con->index].port; + -+ ctxt->prio = nbcon_get_default_prio(); ++ __uart_port_lock_irqsave(up, flags); ++} + -+ any_progress |= nbcon_atomic_emit_one(&wctxt); ++static void univ8250_console_driver_exit(struct console *con, unsigned long flags) ++{ ++ struct uart_port *up = &serial8250_ports[con->index].port; + -+ local_irq_restore(irq_flags); -+ } -+ console_srcu_read_unlock(cookie); -+ } while (any_progress); ++ __uart_port_unlock_irqrestore(up, flags); +} ++#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */ + + static int univ8250_console_setup(struct console *co, char *options) + { +@@ -698,12 +730,20 @@ static int univ8250_console_match(struct console *co, char *name, int idx, + + static struct console univ8250_console = { + .name = "ttyS", ++#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE + .write = univ8250_console_write, ++ .flags = CON_PRINTBUFFER | CON_ANYTIME, ++#else ++ .write_atomic = univ8250_console_write_atomic, ++ .write_thread = univ8250_console_write_thread, ++ .driver_enter = univ8250_console_driver_enter, ++ .driver_exit = univ8250_console_driver_exit, ++ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON, ++#endif + .device = uart_console_device, + .setup = univ8250_console_setup, + .exit = univ8250_console_exit, + .match = univ8250_console_match, +- .flags = CON_PRINTBUFFER | CON_ANYTIME, + .index = -1, + .data = &serial8250_reg, + }; +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c +index 6935bbf8ea28..f741f788bdf0 100644 +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -557,6 +557,11 @@ static int serial8250_em485_init(struct uart_8250_port *p) + if (!p->em485) + return -ENOMEM; + ++#ifndef CONFIG_SERIAL_8250_LEGACY_CONSOLE ++ if (uart_console(&p->port)) ++ dev_warn(p->port.dev, "no atomic printing for rs485 consoles\n"); ++#endif + -+/** -+ * nbcon_atomic_flush_all - Flush all nbcon consoles using their -+ * write_atomic() callback -+ * -+ * Flush the backlog up through the currently newest record. Any new -+ * records added while flushing will not be flushed. This is to avoid -+ * one CPU printing unbounded because other CPUs continue to add records. + hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC, +@@ -709,7 +714,11 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + serial8250_rpm_put(p); + } + +-static void serial8250_clear_IER(struct uart_8250_port *up) ++/* ++ * Only to be used by write_atomic() and the legacy write(), which do not ++ * require port lock. + */ -+void nbcon_atomic_flush_all(void) ++static void __serial8250_clear_IER(struct uart_8250_port *up) + { + if (up->capabilities & UART_CAP_UUE) + serial_out(up, UART_IER, UART_IER_UUE); +@@ -717,6 +726,14 @@ static void serial8250_clear_IER(struct uart_8250_port *up) + serial_out(up, UART_IER, 0); + } + ++static inline void serial8250_clear_IER(struct uart_8250_port *up) +{ -+ __nbcon_atomic_flush_all(prb_next_reserve_seq(prb), false); -+} ++ /* Port locked to synchronize UART_IER access against the console. */ ++ lockdep_assert_held_once(&up->port.lock); + -+/** -+ * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their -+ * write_atomic() callback and allowing unsafe hostile takeovers -+ * -+ * Flush the backlog up through the currently newest record. Unsafe hostile -+ * takeovers will be performed, if necessary. -+ */ -+void nbcon_atomic_flush_unsafe(void) -+{ -+ __nbcon_atomic_flush_all(prb_next_reserve_seq(prb), true); ++ __serial8250_clear_IER(up); +} + -+/** -+ * nbcon_cpu_emergency_enter - Enter an emergency section where printk() -+ * messages for that CPU are only stored -+ * -+ * Upon exiting the emergency section, all stored messages are flushed. -+ * -+ * Context: Any context. Disables preemption. -+ * -+ * When within an emergency section, no printing occurs on that CPU. This -+ * is to allow all emergency messages to be dumped into the ringbuffer before -+ * flushing the ringbuffer. The actual printing occurs when exiting the -+ * outermost emergency section. -+ */ -+void nbcon_cpu_emergency_enter(void) + #ifdef CONFIG_SERIAL_8250_RSA + /* + * Attempts to turn on the RSA FIFO. Returns zero on failure. +@@ -3331,6 +3348,11 @@ static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) + + wait_for_xmitr(up, UART_LSR_THRE); + serial_port_out(port, UART_TX, ch); ++ ++ if (ch == '\n') ++ up->console_newline_needed = false; ++ else ++ up->console_newline_needed = true; + } + + /* +@@ -3359,6 +3381,7 @@ static void serial8250_console_restore(struct uart_8250_port *up) + serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); + } + ++#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE + static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) + { + unsigned int i; +@@ -3436,7 +3459,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + * First save the IER then disable the interrupts + */ + ier = serial_port_in(port, UART_IER); +- serial8250_clear_IER(up); ++ __serial8250_clear_IER(up); + + /* check scratch reg to see if port powered off during system sleep */ + if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { +@@ -3502,6 +3525,135 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + if (locked) + uart_port_unlock_irqrestore(port, flags); + } ++#else ++bool serial8250_console_write_thread(struct uart_8250_port *up, ++ struct nbcon_write_context *wctxt) +{ -+ unsigned int *cpu_emergency_nesting; ++ struct uart_8250_em485 *em485 = up->em485; ++ struct uart_port *port = &up->port; ++ bool done = false; ++ unsigned int ier; + -+ preempt_disable(); ++ touch_nmi_watchdog(); + -+ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); -+ (*cpu_emergency_nesting)++; -+} ++ if (!nbcon_enter_unsafe(wctxt)) ++ return false; + -+/** -+ * nbcon_cpu_emergency_exit - Exit an emergency section and flush the -+ * stored messages -+ * -+ * Flushing only occurs when exiting all nesting for the CPU. -+ * -+ * Context: Any context. Enables preemption. -+ */ -+void nbcon_cpu_emergency_exit(void) -+{ -+ unsigned int *cpu_emergency_nesting; -+ bool do_trigger_flush = false; ++ /* First save IER then disable the interrupts. */ ++ ier = serial_port_in(port, UART_IER); ++ serial8250_clear_IER(up); + -+ cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); ++ /* Check scratch reg if port powered off during system sleep. */ ++ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { ++ serial8250_console_restore(up); ++ up->canary = 0; ++ } + -+ WARN_ON_ONCE(*cpu_emergency_nesting == 0); ++ if (em485) { ++ if (em485->tx_stopped) ++ up->rs485_start_tx(up); ++ mdelay(port->rs485.delay_rts_before_send); ++ } + -+ if (*cpu_emergency_nesting == 1) -+ do_trigger_flush = true; ++ if (nbcon_exit_unsafe(wctxt)) { ++ int len = READ_ONCE(wctxt->len); ++ int i; + -+ /* Undo the nesting count of nbcon_cpu_emergency_enter(). */ -+ (*cpu_emergency_nesting)--; ++ /* ++ * Write out the message. Toggle unsafe for each byte in order ++ * to give another (higher priority) context the opportunity ++ * for a friendly takeover. If such a takeover occurs, this ++ * context must reacquire ownership in order to perform final ++ * actions (such as re-enabling the interrupts). ++ * ++ * IMPORTANT: wctxt->outbuf and wctxt->len are no longer valid ++ * after a reacquire so writing the message must be ++ * aborted. ++ */ ++ for (i = 0; i < len; i++) { ++ if (!nbcon_enter_unsafe(wctxt)) { ++ nbcon_reacquire(wctxt); ++ break; ++ } + -+ preempt_enable(); ++ uart_console_write(port, wctxt->outbuf + i, 1, serial8250_console_putchar); + -+ if (do_trigger_flush) -+ printk_trigger_flush(); -+} ++ if (!nbcon_exit_unsafe(wctxt)) { ++ nbcon_reacquire(wctxt); ++ break; ++ } ++ } ++ done = (i == len); ++ } else { ++ nbcon_reacquire(wctxt); ++ } + -+/** -+ * nbcon_kthread_stop - Stop a printer thread -+ * @con: Console to operate on -+ */ -+static void nbcon_kthread_stop(struct console *con) -+{ -+ lockdep_assert_console_list_lock_held(); ++ while (!nbcon_enter_unsafe(wctxt)) ++ nbcon_reacquire(wctxt); + -+ if (!con->kthread) -+ return; ++ /* Finally, wait for transmitter to become empty and restore IER. */ ++ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); ++ if (em485) { ++ mdelay(port->rs485.delay_rts_after_send); ++ if (em485->tx_stopped) ++ up->rs485_stop_tx(up); ++ } ++ serial_port_out(port, UART_IER, ier); + -+ kthread_stop(con->kthread); -+ con->kthread = NULL; ++ /* ++ * The receive handling will happen properly because the receive ready ++ * bit will still be set; it is not cleared on read. However, modem ++ * control will not, we must call it if we have saved something in the ++ * saved flags while processing with interrupts off. ++ */ ++ if (up->msr_saved_flags) ++ serial8250_modem_status(up); ++ ++ /* Success if no handover/takeover and message fully printed. */ ++ return (nbcon_exit_unsafe(wctxt) && done); +} + -+/** -+ * nbcon_kthread_create - Create a printer thread -+ * @con: Console to operate on -+ * -+ * If it fails, let the console proceed. The atomic part might -+ * be usable and useful. -+ */ -+void nbcon_kthread_create(struct console *con) ++bool serial8250_console_write_atomic(struct uart_8250_port *up, ++ struct nbcon_write_context *wctxt) +{ -+ struct task_struct *kt; ++ struct uart_port *port = &up->port; ++ unsigned int ier; + -+ lockdep_assert_console_list_lock_held(); ++ /* Atomic console not supported for rs485 mode. */ ++ if (up->em485) ++ return false; + -+ if (!(con->flags & CON_NBCON) || !con->write_thread) -+ return; ++ touch_nmi_watchdog(); + -+ if (!printk_threads_enabled || con->kthread) -+ return; ++ if (!nbcon_enter_unsafe(wctxt)) ++ return false; + + /* -+ * Printer threads cannot be started as long as any boot console is -+ * registered because there is no way to synchronize the hardware -+ * registers between boot console code and regular console code. ++ * First save IER then disable the interrupts. The special variant to ++ * clear IER is used because atomic printing may occur without holding ++ * the port lock. + */ -+ if (have_boot_console) -+ return; ++ ier = serial_port_in(port, UART_IER); ++ __serial8250_clear_IER(up); + -+ kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); -+ if (IS_ERR(kt)) { -+ con_printk(KERN_ERR, con, "failed to start printing thread\n"); -+ return; ++ /* Check scratch reg if port powered off during system sleep. */ ++ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { ++ serial8250_console_restore(up); ++ up->canary = 0; + } + -+ con->kthread = kt; ++ if (up->console_newline_needed) ++ uart_console_write(port, "\n", 1, serial8250_console_putchar); ++ uart_console_write(port, wctxt->outbuf, wctxt->len, serial8250_console_putchar); + -+ /* -+ * It is important that console printing threads are scheduled -+ * shortly after a printk call and with generous runtime budgets. -+ */ -+ sched_set_normal(con->kthread, -20); ++ /* Finally, wait for transmitter to become empty and restore IER. */ ++ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); ++ serial_port_out(port, UART_IER, ier); ++ ++ /* Success if no handover/takeover. */ ++ return nbcon_exit_unsafe(wctxt); +} ++#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */ + + static unsigned int probe_baud(struct uart_port *port) + { +@@ -3520,6 +3672,7 @@ static unsigned int probe_baud(struct uart_port *port) + + int serial8250_console_setup(struct uart_port *port, char *options, bool probe) + { ++ struct uart_8250_port *up = up_to_u8250p(port); + int baud = 9600; + int bits = 8; + int parity = 'n'; +@@ -3529,6 +3682,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) + if (!port->iobase && !port->membase) + return -ENODEV; + ++ up->console_newline_needed = false; + -+static int __init printk_setup_threads(void) -+{ -+ struct console *con; + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + else if (probe) +diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h +index be65de65fe61..ec46e3b49ee9 100644 +--- a/include/linux/serial_8250.h ++++ b/include/linux/serial_8250.h +@@ -153,6 +153,8 @@ struct uart_8250_port { + #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA + unsigned char msr_saved_flags; + ++ bool console_newline_needed; + -+ console_list_lock(); -+ printk_threads_enabled = true; -+ for_each_console(con) -+ nbcon_kthread_create(con); + struct uart_8250_dma *dma; + const struct uart_8250_ops *ops; + +@@ -204,6 +206,10 @@ void serial8250_init_port(struct uart_8250_port *up); + void serial8250_set_defaults(struct uart_8250_port *up); + void serial8250_console_write(struct uart_8250_port *up, const char *s, + unsigned int count); ++bool serial8250_console_write_atomic(struct uart_8250_port *up, ++ struct nbcon_write_context *wctxt); ++bool serial8250_console_write_thread(struct uart_8250_port *up, ++ struct nbcon_write_context *wctxt); + int serial8250_console_setup(struct uart_port *port, char *options, bool probe); + int serial8250_console_exit(struct uart_port *port); + +-- +2.51.0 + +From 73ea360f7d912866398e0703d1e461add16c1f87 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 22 Sep 2023 17:35:04 +0000 +Subject: [PATCH 154/213] printk: Add kthread for all legacy consoles + +The write callback of legacy consoles make use of spinlocks. +This is not permitted with PREEMPT_RT in atomic contexts. + +Create a new kthread to handle printing of all the legacy +consoles (and nbcon consoles if boot consoles are registered). + +Since the consoles are printing in a task context, it is no +longer appropriate to support the legacy handover mechanism. + +These changes exist only for CONFIG_PREEMPT_RT. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 1 + + kernel/printk/nbcon.c | 18 ++- + kernel/printk/printk.c | 237 ++++++++++++++++++++++++++++++++------- + 3 files changed, 210 insertions(+), 46 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index dcf365635f71..7db6992c54f3 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -95,6 +95,7 @@ void nbcon_atomic_flush_all(void); + bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie); + void nbcon_kthread_create(struct console *con); + void nbcon_wake_threads(void); ++void nbcon_legacy_kthread_create(void); + + /* + * Check if the given console is currently capable and allowed to print +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index f843df54ee82..1b1b585b1675 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1247,9 +1247,11 @@ bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cook + *handover = false; + + /* Use the same locking order as console_emit_next_record(). */ +- printk_safe_enter_irqsave(flags); +- console_lock_spinning_enable(); +- stop_critical_timings(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ printk_safe_enter_irqsave(flags); ++ console_lock_spinning_enable(); ++ stop_critical_timings(); ++ } + + con->driver_enter(con, &driver_flags); + cant_migrate(); +@@ -1261,9 +1263,11 @@ bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cook + + con->driver_exit(con, driver_flags); + +- start_critical_timings(); +- *handover = console_lock_spinning_disable_and_check(cookie); +- printk_safe_exit_irqrestore(flags); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ start_critical_timings(); ++ *handover = console_lock_spinning_disable_and_check(cookie); ++ printk_safe_exit_irqrestore(flags); ++ } + + return progress; + } +@@ -1469,6 +1473,8 @@ static int __init printk_setup_threads(void) + printk_threads_enabled = true; + for_each_console(con) + nbcon_kthread_create(con); + if (IS_ENABLED(CONFIG_PREEMPT_RT) && printing_via_unlock) + nbcon_legacy_kthread_create(); -+ console_list_unlock(); -+ return 0; -+} -+early_initcall(printk_setup_threads); + console_list_unlock(); + return 0; + } +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 08ddc8db7a0d..aaafaa15f534 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -487,6 +487,9 @@ bool have_boot_console; + + #ifdef CONFIG_PRINTK + DECLARE_WAIT_QUEUE_HEAD(log_wait); + -+/** -+ * nbcon_alloc - Allocate buffers needed by the nbcon console -+ * @con: Console to allocate buffers for -+ * -+ * Return: True on success. False otherwise and the console cannot -+ * be used. -+ * -+ * This is not part of nbcon_init() because buffer allocation must -+ * be performed earlier in the console registration process. -+ */ -+bool nbcon_alloc(struct console *con) -+{ -+ if (con->flags & CON_BOOT) { -+ /* -+ * Boot console printing is synchronized with legacy console -+ * printing, so boot consoles can share the same global printk -+ * buffers. -+ */ -+ con->pbufs = &printk_shared_pbufs; -+ } else { -+ con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); -+ if (!con->pbufs) { -+ con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); -+ return false; -+ } -+ } ++static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); + -+ return true; -+} + /* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ + static u64 syslog_seq; +@@ -2351,7 +2354,8 @@ asmlinkage int vprintk_emit(int facility, int level, + const struct dev_printk_info *dev_info, + const char *fmt, va_list args) + { +- bool do_trylock_unlock = printing_via_unlock; ++ bool do_trylock_unlock = printing_via_unlock && ++ !IS_ENABLED(CONFIG_PREEMPT_RT); + int printed_len; + + /* Suppress unimportant messages after panic happens */ +@@ -2479,6 +2483,14 @@ EXPORT_SYMBOL(_printk); + static bool pr_flush(int timeout_ms, bool reset_on_progress); + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); + ++static struct task_struct *nbcon_legacy_kthread; + -+/** -+ * nbcon_init - Initialize the nbcon console specific data -+ * @con: Console to initialize -+ * -+ * nbcon_alloc() *must* be called and succeed before this function -+ * is called. -+ * -+ * This function expects that the legacy @con->seq has been set. -+ */ -+void nbcon_init(struct console *con) ++static inline void wake_up_legacy_kthread(void) +{ -+ struct nbcon_state state = { }; -+ -+ /* nbcon_alloc() must have been called and successful! */ -+ BUG_ON(!con->pbufs); -+ -+ rcuwait_init(&con->rcuwait); -+ init_irq_work(&con->irq_work, nbcon_irq_work); -+ nbcon_seq_force(con, con->seq); -+ nbcon_state_set(con, &state); -+ nbcon_kthread_create(con); ++ if (nbcon_legacy_kthread) ++ wake_up_interruptible(&legacy_wait); +} + -+/** -+ * nbcon_free - Free and cleanup the nbcon console specific data -+ * @con: Console to free/cleanup nbcon data -+ */ -+void nbcon_free(struct console *con) -+{ -+ struct nbcon_state state = { }; -+ -+ nbcon_kthread_stop(con); -+ nbcon_state_set(con, &state); -+ -+ /* Boot consoles share global printk buffers. */ -+ if (!(con->flags & CON_BOOT)) -+ kfree(con->pbufs); -+ -+ con->pbufs = NULL; -+} + #else /* CONFIG_PRINTK */ + + #define printk_time false +@@ -2492,6 +2504,8 @@ static u64 syslog_seq; + static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } + ++static inline void nbcon_legacy_kthread_create(void) { } ++static inline void wake_up_legacy_kthread(void) { } + #endif /* CONFIG_PRINTK */ + + #ifdef CONFIG_EARLY_PRINTK +@@ -2729,6 +2743,8 @@ void resume_console(void) + } + console_srcu_read_unlock(cookie); + ++ wake_up_legacy_kthread(); + -+static inline bool uart_is_nbcon(struct uart_port *up) -+{ -+ int cookie; -+ bool ret; + pr_flush(1000, true); + } + +@@ -2743,7 +2759,8 @@ void resume_console(void) + */ + static int console_cpu_notify(unsigned int cpu) + { +- if (!cpuhp_tasks_frozen && printing_via_unlock) { ++ if (!cpuhp_tasks_frozen && printing_via_unlock && ++ !IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); +@@ -2968,31 +2985,43 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co + con->dropped = 0; + } + +- /* +- * While actively printing out messages, if another printk() +- * were to occur on another CPU, it may wait for this one to +- * finish. This task can not be preempted if there is a +- * waiter waiting to take over. +- * +- * Interrupts are disabled because the hand over to a waiter +- * must not be interrupted until the hand over is completed +- * (@console_waiter is cleared). +- */ +- printk_safe_enter_irqsave(flags); +- console_lock_spinning_enable(); ++ /* Write everything out to the hardware. */ + +- /* Do not trace print latency. */ +- stop_critical_timings(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ /* ++ * On PREEMPT_RT this function is either in a thread or ++ * panic context. So there is no need for concern about ++ * printk reentrance or handovers. ++ */ + +- /* Write everything out to the hardware. */ +- con->write(con, outbuf, pmsg.outbuf_len); ++ con->write(con, outbuf, pmsg.outbuf_len); ++ con->seq = pmsg.seq + 1; ++ } else { ++ /* ++ * While actively printing out messages, if another printk() ++ * were to occur on another CPU, it may wait for this one to ++ * finish. This task can not be preempted if there is a ++ * waiter waiting to take over. ++ * ++ * Interrupts are disabled because the hand over to a waiter ++ * must not be interrupted until the hand over is completed ++ * (@console_waiter is cleared). ++ */ ++ printk_safe_enter_irqsave(flags); ++ console_lock_spinning_enable(); + +- start_critical_timings(); ++ /* Do not trace print latency. */ ++ stop_critical_timings(); + +- con->seq = pmsg.seq + 1; ++ con->write(con, outbuf, pmsg.outbuf_len); + +- *handover = console_lock_spinning_disable_and_check(cookie); +- printk_safe_exit_irqrestore(flags); ++ start_critical_timings(); + -+ if (!uart_console(up)) -+ return false; ++ con->seq = pmsg.seq + 1; + -+ cookie = console_srcu_read_lock(); -+ ret = (console_srcu_read_flags(up->cons) & CON_NBCON); -+ console_srcu_read_unlock(cookie); -+ return ret; -+} ++ *handover = console_lock_spinning_disable_and_check(cookie); ++ printk_safe_exit_irqrestore(flags); ++ } + skip: + return true; + } +@@ -3102,19 +3131,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + return false; + } + +-/** +- * console_unlock - unblock the console subsystem from printing +- * +- * Releases the console_lock which the caller holds to block printing of +- * the console subsystem. +- * +- * While the console_lock was held, console output may have been buffered +- * by printk(). If this is the case, console_unlock(); emits +- * the output prior to releasing the lock. +- * +- * console_unlock(); may be called from any context. +- */ +-void console_unlock(void) ++static void console_flush_and_unlock(void) + { + bool do_cond_resched; + bool handover; +@@ -3158,6 +3175,32 @@ void console_unlock(void) + */ + } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); + } + +/** -+ * nbcon_acquire - The second half of the port locking wrapper -+ * @up: The uart port whose @lock was locked ++ * console_unlock - unblock the console subsystem from printing + * -+ * The uart_port_lock() wrappers will first lock the spin_lock @up->lock. -+ * Then this function is called to implement nbcon-specific processing. ++ * Releases the console_lock which the caller holds to block printing of ++ * the console subsystem. + * -+ * If @up is an nbcon console, this console will be acquired and marked as -+ * unsafe. Otherwise this function does nothing. ++ * While the console_lock was held, console output may have been buffered ++ * by printk(). If this is the case, console_unlock(); emits ++ * the output prior to releasing the lock. + * -+ * nbcon consoles acquired via the port lock wrapper always use priority -+ * NBCON_PRIO_NORMAL. ++ * console_unlock(); may be called from any context. + */ -+void nbcon_acquire(struct uart_port *up) ++void console_unlock(void) +{ -+ struct console *con = up->cons; -+ struct nbcon_context ctxt; -+ -+ if (!uart_is_nbcon(up)) ++ /* ++ * PREEMPT_RT relies on kthread and atomic consoles for printing. ++ * It never attempts to print from console_unlock(). ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ __console_unlock(); + return; ++ } + -+ WARN_ON_ONCE(up->nbcon_locked_port); ++ console_flush_and_unlock(); ++} + EXPORT_SYMBOL(console_unlock); + + /** +@@ -3377,11 +3420,106 @@ void console_start(struct console *console) + + if (flags & CON_NBCON) + nbcon_kthread_wake(console); ++ else ++ wake_up_legacy_kthread(); + + __pr_flush(console, 1000, true); + } + EXPORT_SYMBOL(console_start); + ++#ifdef CONFIG_PRINTK ++static bool printer_should_wake(void) ++{ ++ bool available = false; ++ struct console *con; ++ int cookie; + -+ do { -+ do { -+ memset(&ctxt, 0, sizeof(ctxt)); -+ ctxt.console = con; -+ ctxt.prio = NBCON_PRIO_NORMAL; -+ } while (!nbcon_context_try_acquire(&ctxt)); ++ if (kthread_should_stop()) ++ return true; + -+ } while (!nbcon_context_enter_unsafe(&ctxt)); ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ short flags = console_srcu_read_flags(con); ++ u64 printk_seq; + -+ up->nbcon_locked_port = true; -+} -+EXPORT_SYMBOL_GPL(nbcon_acquire); ++ /* ++ * The legacy printer thread is only for legacy consoles, ++ * unless the nbcon console has no kthread printer. ++ */ ++ if ((flags & CON_NBCON) && con->kthread) ++ continue; + -+/** -+ * nbcon_release - The first half of the port unlocking wrapper -+ * @up: The uart port whose @lock is about to be unlocked -+ * -+ * The uart_port_unlock() wrappers will first call this function to implement -+ * nbcon-specific processing. Then afterwards the uart_port_unlock() wrappers -+ * will unlock the spin_lock @up->lock. -+ * -+ * If @up is an nbcon console, the console will be marked as safe and -+ * released. Otherwise this function does nothing. -+ * -+ * nbcon consoles acquired via the port lock wrapper always use priority -+ * NBCON_PRIO_NORMAL. -+ */ -+void nbcon_release(struct uart_port *up) -+{ -+ struct console *con = up->cons; -+ struct nbcon_context ctxt = { -+ .console = con, -+ .prio = NBCON_PRIO_NORMAL, -+ }; ++ if (!console_is_usable(con, flags, true)) ++ continue; + -+ if (!up->nbcon_locked_port) -+ return; ++ if (flags & CON_NBCON) { ++ printk_seq = nbcon_seq_read(con); ++ } else { ++ /* ++ * It is safe to read @seq because only this ++ * thread context updates @seq. ++ */ ++ printk_seq = con->seq; ++ } + -+ if (nbcon_context_exit_unsafe(&ctxt)) -+ nbcon_context_release(&ctxt); ++ if (prb_read_valid(prb, printk_seq, NULL)) { ++ available = true; ++ break; ++ } ++ } ++ console_srcu_read_unlock(cookie); + -+ up->nbcon_locked_port = false; ++ return available; +} -+EXPORT_SYMBOL_GPL(nbcon_release); + -+/** -+ * printk_kthread_shutdown - shutdown all threaded printers -+ * -+ * On system shutdown all threaded printers are stopped. This allows printk -+ * to transition back to atomic printing, thus providing a robust mechanism -+ * for the final shutdown/reboot messages to be output. -+ */ -+static void printk_kthread_shutdown(void) ++static int nbcon_legacy_kthread_func(void *unused) +{ -+ struct console *con; ++ int error; + -+ console_list_lock(); -+ for_each_console(con) { -+ if (con->flags & CON_NBCON) -+ nbcon_kthread_stop(con); -+ } -+ console_list_unlock(); -+} ++ for (;;) { ++ error = wait_event_interruptible(legacy_wait, printer_should_wake()); + -+static struct syscore_ops printk_syscore_ops = { -+ .shutdown = printk_kthread_shutdown, -+}; ++ if (kthread_should_stop()) ++ break; + -+static int __init printk_init_ops(void) -+{ -+ register_syscore_ops(&printk_syscore_ops); -+ return 0; -+} -+device_initcall(printk_init_ops); -diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c -index dcdf44961..80ccfbb6b 100644 ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -102,12 +102,6 @@ DEFINE_STATIC_SRCU(console_srcu); - */ - int __read_mostly suppress_printk; - --/* -- * During panic, heavy printk by other CPUs can delay the -- * panic and risk deadlock on console resources. -- */ --static int __read_mostly suppress_panic_printk; -- - #ifdef CONFIG_LOCKDEP - static struct lockdep_map console_lock_dep_map = { - .name = "console_lock" -@@ -288,6 +282,7 @@ EXPORT_SYMBOL(console_list_unlock); - * Return: A cookie to pass to console_srcu_read_unlock(). - */ - int console_srcu_read_lock(void) -+ __acquires(&console_srcu) - { - return srcu_read_lock_nmisafe(&console_srcu); - } -@@ -301,6 +296,7 @@ EXPORT_SYMBOL(console_srcu_read_lock); - * Counterpart to console_srcu_read_lock() - */ - void console_srcu_read_unlock(int cookie) -+ __releases(&console_srcu) - { - srcu_read_unlock_nmisafe(&console_srcu, cookie); - } -@@ -353,6 +349,29 @@ static bool panic_in_progress(void) - return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); - } - -+/* Return true if a panic is in progress on the current CPU. */ -+bool this_cpu_in_panic(void) -+{ -+ /* -+ * We can use raw_smp_processor_id() here because it is impossible for -+ * the task to be migrated to the panic_cpu, or away from it. If -+ * panic_cpu has already been set, and we're not currently executing on -+ * that CPU, then we never will be. -+ */ -+ return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); -+} ++ if (error) ++ continue; + -+/* -+ * Return true if a panic is in progress on a remote CPU. -+ * -+ * On true, the local CPU should immediately release any printing resources -+ * that may be needed by the panic CPU. -+ */ -+bool other_cpu_in_panic(void) -+{ -+ return (panic_in_progress() && !this_cpu_in_panic()); ++ console_lock(); ++ console_flush_and_unlock(); ++ } ++ ++ return 0; +} + - /* - * This is used for debugging the mess that is the VT code by - * keeping track if we have the console semaphore held. It's -@@ -444,8 +463,33 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; - /* syslog_lock protects syslog_* variables and write access to clear_seq. */ - static DEFINE_MUTEX(syslog_lock); - -+/* -+ * Specifies if a legacy console is registered. If legacy consoles are -+ * present, it is necessary to perform the console_lock/console_unlock dance -+ * whenever console flushing should occur. -+ */ -+bool have_legacy_console; ++void nbcon_legacy_kthread_create(void) ++{ ++ struct task_struct *kt; + -+/* -+ * Specifies if an nbcon console is registered. If nbcon consoles are present, -+ * synchronous printing of legacy consoles will not occur during panic until -+ * the backtrace has been stored to the ringbuffer. -+ */ -+bool have_nbcon_console; ++ lockdep_assert_held(&console_mutex); + -+/* -+ * Specifies if a boot console is registered. If boot consoles are present, -+ * nbcon consoles cannot print simultaneously and must be synchronized by -+ * the console lock. This is because boot consoles and nbcon consoles may -+ * have mapped the same hardware. -+ */ -+bool have_boot_console; ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ return; + - #ifdef CONFIG_PRINTK - DECLARE_WAIT_QUEUE_HEAD(log_wait); ++ if (!printk_threads_enabled || nbcon_legacy_kthread) ++ return; + -+static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); ++ kt = kthread_run(nbcon_legacy_kthread_func, NULL, "pr/legacy"); ++ if (IS_ERR(kt)) { ++ pr_err("unable to start legacy printing thread\n"); ++ return; ++ } + - /* All 3 protected by @syslog_lock. */ - /* the next printk record to read by syslog(READ) or /proc/kmsg */ - static u64 syslog_seq; -@@ -494,7 +538,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, - - static struct printk_ringbuffer printk_rb_dynamic; - --static struct printk_ringbuffer *prb = &printk_rb_static; -+struct printk_ringbuffer *prb = &printk_rb_static; ++ nbcon_legacy_kthread = kt; ++ ++ /* ++ * It is important that console printing threads are scheduled ++ * shortly after a printk call and with generous runtime budgets. ++ */ ++ sched_set_normal(nbcon_legacy_kthread, -20); ++} ++#endif /* CONFIG_PRINTK */ ++ + static int __read_mostly keep_bootcon; - /* - * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before -@@ -698,9 +742,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, - return len; - } + static int __init keep_bootcon_setup(char *str) +@@ -3663,6 +3801,7 @@ void register_console(struct console *newcon) + nbcon_init(newcon); + } else { + have_legacy_console = true; ++ nbcon_legacy_kthread_create(); + } --static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -- bool is_extended, bool may_supress); -- - /* /dev/kmsg - userspace message inject/listen interface */ - struct devkmsg_user { - atomic64_t seq; -@@ -1848,7 +1889,7 @@ static bool console_waiter; - * there may be a waiter spinning (like a spinlock). Also it must be - * ready to hand over the lock at the end of the section. - */ --static void console_lock_spinning_enable(void) -+void console_lock_spinning_enable(void) - { - /* - * Do not use spinning in panic(). The panic CPU wants to keep the lock. -@@ -1887,7 +1928,7 @@ static void console_lock_spinning_enable(void) - * - * Return: 1 if the lock rights were passed, 0 otherwise. - */ --static int console_lock_spinning_disable_and_check(int cookie) -+int console_lock_spinning_disable_and_check(int cookie) - { - int waiter; + if (newcon->flags & CON_BOOT) +@@ -3801,6 +3940,13 @@ static int unregister_console_locked(struct console *console) + nbcon_kthread_create(c); + } -@@ -2298,54 +2339,123 @@ int vprintk_store(int facility, int level, - return ret; ++#ifdef CONFIG_PRINTK ++ if (!printing_via_unlock && nbcon_legacy_kthread) { ++ kthread_stop(nbcon_legacy_kthread); ++ nbcon_legacy_kthread = NULL; ++ } ++#endif ++ + return res; } -+static bool legacy_allow_panic_sync; -+ -+/* -+ * This acts as a one-way switch to allow legacy consoles to print from -+ * the printk() caller context on a panic CPU. -+ */ -+void printk_legacy_allow_panic_sync(void) -+{ -+ legacy_allow_panic_sync = true; -+} -+ - asmlinkage int vprintk_emit(int facility, int level, - const struct dev_printk_info *dev_info, - const char *fmt, va_list args) - { -+ bool do_trylock_unlock = printing_via_unlock && -+ !IS_ENABLED(CONFIG_PREEMPT_RT); - int printed_len; -- bool in_sched = false; +@@ -3960,8 +4106,12 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre - /* Suppress unimportant messages after panic happens */ - if (unlikely(suppress_printk)) - return 0; + seq = prb_next_reserve_seq(prb); -- if (unlikely(suppress_panic_printk) && other_cpu_in_panic()) +- /* Flush the consoles so that records up to @seq are printed. */ +- if (printing_via_unlock) { + /* -+ * The messages on the panic CPU are the most important. If -+ * non-panic CPUs are generating any messages, they will be -+ * silently dropped. ++ * Flush the consoles so that records up to @seq are printed. ++ * Otherwise this function will just wait for the threaded printers ++ * to print up to @seq. + */ -+ if (other_cpu_in_panic()) - return 0; - - if (level == LOGLEVEL_SCHED) { - level = LOGLEVEL_DEFAULT; -- in_sched = true; -+ /* If called from the scheduler, we can not call up(). */ -+ do_trylock_unlock = false; ++ if (printing_via_unlock && !IS_ENABLED(CONFIG_PREEMPT_RT)) { + console_lock(); + console_unlock(); } +@@ -4069,9 +4219,16 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) + int pending = this_cpu_xchg(printk_pending, 0); - printk_delay(level); - - printed_len = vprintk_store(facility, level, dev_info, fmt, args); - -- /* If called from the scheduler, we can not call up(). */ -- if (!in_sched) { -+ if (!have_boot_console && have_nbcon_console) { -+ bool is_panic_context = this_cpu_in_panic(); -+ -+ /* -+ * In panic, the legacy consoles are not allowed to print from -+ * the printk calling context unless explicitly allowed. This -+ * gives the safe nbcon consoles a chance to print out all the -+ * panic messages first. This restriction only applies if -+ * there are nbcon consoles registered. -+ */ -+ if (is_panic_context) -+ do_trylock_unlock &= legacy_allow_panic_sync; -+ -+ /* -+ * There are situations where nbcon atomic printing should -+ * happen in the printk() caller context: -+ * -+ * - When this CPU is in panic. -+ * -+ * - When booting, before the printing threads have been -+ * started. -+ * -+ * - During shutdown, since the printing threads may not get -+ * a chance to print the final messages. -+ * -+ * Note that if boot consoles are registered, the -+ * console_lock/console_unlock dance must be relied upon -+ * instead because nbcon consoles cannot print simultaneously -+ * with boot consoles. -+ */ -+ if (is_panic_context || -+ !printk_threads_enabled || -+ (system_state > SYSTEM_RUNNING)) { -+ nbcon_atomic_flush_all(); -+ } -+ } -+ -+ nbcon_wake_threads(); -+ -+ if (do_trylock_unlock) { - /* - * The caller may be holding system-critical or - * timing-sensitive locks. Disable preemption during - * printing of all remaining records to all consoles so that - * this context can return as soon as possible. Hopefully - * another printk() caller will take over the printing. -+ * -+ * Also, nbcon_get_default_prio() requires migration disabled. - */ - preempt_disable(); -+ - /* -- * Try to acquire and then immediately release the console -- * semaphore. The release will print out buffers. With the -- * spinning variant, this context tries to take over the -- * printing from another printing context. -+ * Do not emit for EMERGENCY priority. The console will be -+ * explicitly flushed when exiting the emergency section. - */ -- if (console_trylock_spinning()) + if (pending & PRINTK_PENDING_OUTPUT) { +- /* If trylock fails, someone else is doing the printing */ +- if (console_trylock()) - console_unlock(); -+ if (nbcon_get_default_prio() == NBCON_PRIO_EMERGENCY) { -+ do_trylock_unlock = false; ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ wake_up_interruptible(&legacy_wait); + } else { + /* -+ * Try to acquire and then immediately release the -+ * console semaphore. The release will print out -+ * buffers. With the spinning variant, this context -+ * tries to take over the printing from another -+ * printing context. ++ * If trylock fails, some other context ++ * will do the printing. + */ -+ if (console_trylock_spinning()) ++ if (console_trylock()) + console_unlock(); + } + } + + if (pending & PRINTK_PENDING_WAKEUP) +-- +2.51.0 + +From 05488d23c5e6a8ac3da1070d67ecf480bdf7a18b Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Mon, 11 Dec 2023 09:34:16 +0000 +Subject: [PATCH 155/213] printk: Avoid false positive lockdep report for + legacy driver. + +printk may invoke the legacy console driver from atomic context. This leads to +a lockdep splat because the console driver will acquire a sleeping lock and the +caller may also hold a spinning lock. This is noticed by lockdep on !PREEMPT_RT +configurations because it will also lead to a problem on PREEMPT_RT. + +On PREEMPT_RT the atomic path is always avoided and the console driver is +always invoked from a dedicated thread. Thus the lockdep splat is a false +positive. + +Override the lock-context before invoking the console driver. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index aaafaa15f534..98495c96a3fa 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2823,6 +2823,8 @@ static void __console_unlock(void) + up_console_sem(); + } + ++static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); + - preempt_enable(); + #ifdef CONFIG_PRINTK + + /* +@@ -2991,7 +2993,7 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co + /* + * On PREEMPT_RT this function is either in a thread or + * panic context. So there is no need for concern about +- * printk reentrance or handovers. ++ * printk reentrance, handovers, or lockdep complaints. + */ + + con->write(con, outbuf, pmsg.outbuf_len); +@@ -3013,7 +3015,9 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co + /* Do not trace print latency. */ + stop_critical_timings(); + ++ lock_map_acquire_try(&printk_legacy_map); + con->write(con, outbuf, pmsg.outbuf_len); ++ lock_map_release(&printk_legacy_map); + + start_critical_timings(); + +@@ -3090,7 +3094,10 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + any_usable = true; + + if (flags & CON_NBCON) { ++ ++ lock_map_acquire_try(&printk_legacy_map); + progress = nbcon_atomic_emit_next_record(con, handover, cookie); ++ lock_map_release(&printk_legacy_map); + + printk_seq = nbcon_seq_read(con); + } else { +-- +2.51.0 + +From 98cc7470ba429cbc03af321c31ed6d7ea5dbb83b Mon Sep 17 00:00:00 2001 +From: Mike Galbraith +Date: Sat, 27 Feb 2016 08:09:11 +0100 +Subject: [PATCH 156/213] drm/i915: Use preempt_disable/enable_rt() where + recommended + +Mario Kleiner suggest in commit + ad3543ede630f ("drm/intel: Push get_scanout_position() timestamping into kms driver.") + +a spots where preemption should be disabled on PREEMPT_RT. The +difference is that on PREEMPT_RT the intel_uncore::lock disables neither +preemption nor interrupts and so region remains preemptible. + +The area covers only register reads and writes. The part that worries me +is: +- __intel_get_crtc_scanline() the worst case is 100us if no match is + found. + +- intel_crtc_scanlines_since_frame_timestamp() not sure how long this + may take in the worst case. + +It was in the RT queue for a while and nobody complained. +Disable preemption on PREEPMPT_RT during timestamping. + +[bigeasy: patch description.] + +Cc: Mario Kleiner +Signed-off-by: Mike Galbraith +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/display/intel_vblank.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c +index f5659ebd08eb..5b6d2f55528d 100644 +--- a/drivers/gpu/drm/i915/display/intel_vblank.c ++++ b/drivers/gpu/drm/i915/display/intel_vblank.c +@@ -294,7 +294,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, + */ + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + +- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + + /* Get optional system timestamp before query. */ + if (stime) +@@ -358,7 +359,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, + if (etime) + *etime = ktime_get(); + +- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + +-- +2.51.0 + +From d90486fd0b182e7d665b69db2c32a866d5e49e6d Mon Sep 17 00:00:00 2001 +From: Mike Galbraith +Date: Sat, 27 Feb 2016 09:01:42 +0100 +Subject: [PATCH 157/213] drm/i915: Don't disable interrupts on PREEMPT_RT + during atomic updates + +Commit + 8d7849db3eab7 ("drm/i915: Make sprite updates atomic") + +started disabling interrupts across atomic updates. This breaks on PREEMPT_RT +because within this section the code attempt to acquire spinlock_t locks which +are sleeping locks on PREEMPT_RT. + +According to the comment the interrupts are disabled to avoid random delays and +not required for protection or synchronisation. +If this needs to happen with disabled interrupts on PREEMPT_RT, and the +whole section is restricted to register access then all sleeping locks +need to be acquired before interrupts are disabled and some function +maybe moved after enabling interrupts again. +This includes: +- prepare_to_wait() + finish_wait() due its wake queue. +- drm_crtc_vblank_put() -> vblank_disable_fn() drm_device::vbl_lock. +- skl_pfit_enable(), intel_update_plane(), vlv_atomic_update_fifo() and + maybe others due to intel_uncore::lock +- drm_crtc_arm_vblank_event() due to drm_device::event_lock and + drm_device::vblank_time_lock. + +Don't disable interrupts on PREEMPT_RT during atomic updates. + +[bigeasy: drop local locks, commit message] + +Signed-off-by: Mike Galbraith +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/display/intel_crtc.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c +index cfbfbfed3f5e..da2becfbc86c 100644 +--- a/drivers/gpu/drm/i915/display/intel_crtc.c ++++ b/drivers/gpu/drm/i915/display/intel_crtc.c +@@ -562,7 +562,8 @@ void intel_pipe_update_start(struct intel_atomic_state *state, + */ + intel_psr_wait_for_idle_locked(new_crtc_state); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + + crtc->debug.min_vbl = min; + crtc->debug.max_vbl = max; +@@ -587,11 +588,13 @@ void intel_pipe_update_start(struct intel_atomic_state *state, + break; + } + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + timeout = schedule_timeout(timeout); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); } -- if (in_sched) -- defer_console_output(); -- else -+ if (do_trylock_unlock) - wake_up_klogd(); -+ else -+ defer_console_output(); + finish_wait(wq, &wait); +@@ -624,7 +627,8 @@ void intel_pipe_update_start(struct intel_atomic_state *state, + return; - return printed_len; + irq_disable: +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); } -@@ -2373,6 +2483,14 @@ EXPORT_SYMBOL(_printk); - static bool pr_flush(int timeout_ms, bool reset_on_progress); - static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); - -+static struct task_struct *nbcon_legacy_kthread; -+ -+static inline void wake_up_legacy_kthread(void) -+{ -+ if (nbcon_legacy_kthread) -+ wake_up_interruptible(&legacy_wait); -+} -+ - #else /* CONFIG_PRINTK */ - #define printk_time false -@@ -2383,25 +2501,11 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) +@@ -728,7 +732,8 @@ void intel_pipe_update_end(struct intel_atomic_state *state, + */ + intel_vrr_send_push(new_crtc_state); - static u64 syslog_seq; +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); --static size_t record_print_text(const struct printk_record *r, -- bool syslog, bool time) --{ -- return 0; --} --static ssize_t info_print_ext_header(char *buf, size_t size, -- struct printk_info *info) --{ -- return 0; --} --static ssize_t msg_print_ext_body(char *buf, size_t size, -- char *text, size_t text_len, -- struct dev_printk_info *dev_info) { return 0; } --static void console_lock_spinning_enable(void) { } --static int console_lock_spinning_disable_and_check(int cookie) { return 0; } --static bool suppress_message_printing(int level) { return false; } - static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } - static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } + if (intel_vgpu_active(dev_priv)) + return; +-- +2.51.0 + +From ecbb593034899abc33dceecedf5c8a5e7d1449c3 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 25 Oct 2021 15:05:18 +0200 +Subject: [PATCH 158/213] drm/i915: Don't check for atomic context on + PREEMPT_RT + +The !in_atomic() check in _wait_for_atomic() triggers on PREEMPT_RT +because the uncore::lock is a spinlock_t and does not disable +preemption or interrupts. + +Changing the uncore:lock to a raw_spinlock_t doubles the worst case +latency on an otherwise idle testbox during testing. Therefore I'm +currently unsure about changing this. + +Link: https://lore.kernel.org/all/20211006164628.s2mtsdd2jdbfyf7g@linutronix.de/ +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/i915_utils.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h +index c61066498bf2..48e19e55d6b0 100644 +--- a/drivers/gpu/drm/i915/i915_utils.h ++++ b/drivers/gpu/drm/i915/i915_utils.h +@@ -288,7 +288,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) + #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) -+static inline void nbcon_legacy_kthread_create(void) { } -+static inline void wake_up_legacy_kthread(void) { } - #endif /* CONFIG_PRINTK */ + /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ +-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) ++#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) + # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) + #else + # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +-- +2.51.0 + +From 40309c158a5850829f13b0d0ffb68a0b8cd478e4 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 6 Dec 2018 09:52:20 +0100 +Subject: [PATCH 159/213] drm/i915: Disable tracing points on PREEMPT_RT + +Luca Abeni reported this: +| BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 +| CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10 +| Call Trace: +| rt_spin_lock+0x3f/0x50 +| gen6_read32+0x45/0x1d0 [i915] +| g4x_get_vblank_counter+0x36/0x40 [i915] +| trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915] + +The tracing events use trace_i915_pipe_update_start() among other events +use functions acquire spinlock_t locks which are transformed into +sleeping locks on PREEMPT_RT. A few trace points use +intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also +might acquire a sleeping locks on PREEMPT_RT. +At the time the arguments are evaluated within trace point, preemption +is disabled and so the locks must not be acquired on PREEMPT_RT. + +Based on this I don't see any other way than disable trace points on +PREMPT_RT. + +Reported-by: Luca Abeni +Cc: Steven Rostedt +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/i915_trace.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index ce1cbee1b39d..c54653cf72c9 100644 +--- a/drivers/gpu/drm/i915/i915_trace.h ++++ b/drivers/gpu/drm/i915/i915_trace.h +@@ -6,6 +6,10 @@ + #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) + #define _I915_TRACE_H_ - #ifdef CONFIG_EARLY_PRINTK -@@ -2609,6 +2713,8 @@ void suspend_console(void) - void resume_console(void) - { - struct console *con; -+ short flags; -+ int cookie; ++#ifdef CONFIG_PREEMPT_RT ++#define NOTRACE ++#endif ++ + #include + #include + #include +-- +2.51.0 + +From d6583c7bf2ed3758e3a3e53cb1cb3fb6dd4dae04 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 19 Dec 2018 10:47:02 +0100 +Subject: [PATCH 160/213] drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with + NOTRACE + +The order of the header files is important. If this header file is +included after tracepoint.h was included then the NOTRACE here becomes a +nop. Currently this happens for two .c files which use the tracepoitns +behind DRM_I915_LOW_LEVEL_TRACEPOINTS. + +Cc: Steven Rostedt +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + drivers/gpu/drm/i915/i915_trace.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index c54653cf72c9..3c51620d011b 100644 +--- a/drivers/gpu/drm/i915/i915_trace.h ++++ b/drivers/gpu/drm/i915/i915_trace.h +@@ -326,7 +326,7 @@ DEFINE_EVENT(i915_request, i915_request_add, + TP_ARGS(rq) + ); - if (!console_suspend_enabled) +-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) ++#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) + DEFINE_EVENT(i915_request, i915_request_guc_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) +-- +2.51.0 + +From 71b8be9c5490e21fc0a6a7cd8a0b3e8a7f79a8d7 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 8 Sep 2021 17:18:00 +0200 +Subject: [PATCH 161/213] drm/i915/gt: Queue and wait for the irq_work item. + +Disabling interrupts and invoking the irq_work function directly breaks +on PREEMPT_RT. +PREEMPT_RT does not invoke all irq_work from hardirq context because +some of the user have spinlock_t locking in the callback function. +These locks are then turned into a sleeping locks which can not be +acquired with disabled interrupts. + +Using irq_work_queue() has the benefit that the irqwork will be invoked +in the regular context. In general there is "no" delay between enqueuing +the callback and its invocation because the interrupt is raised right +away on architectures which support it (which includes x86). + +Use irq_work_queue() + irq_work_sync() instead invoking the callback +directly. + +Reported-by: Clark Williams +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Maarten Lankhorst +--- + drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +index f2973cd1a8aa..aa77f8601b8a 100644 +--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c ++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +@@ -315,7 +315,12 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) return; -@@ -2625,6 +2731,20 @@ void resume_console(void) - */ - synchronize_srcu(&console_srcu); -+ /* -+ * Since this runs in task context, wake the threaded printers -+ * directly rather than scheduling irq_work to do it. -+ */ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ flags = console_srcu_read_flags(con); -+ if (flags & CON_NBCON) -+ nbcon_kthread_wake(con); + /* Kick the work once more to drain the signalers, and disarm the irq */ +- irq_work_queue(&b->irq_work); ++ irq_work_sync(&b->irq_work); ++ while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { ++ irq_work_queue(&b->irq_work); ++ cond_resched(); ++ irq_work_sync(&b->irq_work); + } -+ console_srcu_read_unlock(cookie); -+ -+ wake_up_legacy_kthread(); -+ - pr_flush(1000, true); } -@@ -2639,7 +2759,8 @@ void resume_console(void) - */ - static int console_cpu_notify(unsigned int cpu) - { -- if (!cpuhp_tasks_frozen) { -+ if (!cpuhp_tasks_frozen && printing_via_unlock && -+ !IS_ENABLED(CONFIG_PREEMPT_RT)) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); -@@ -2647,26 +2768,6 @@ static int console_cpu_notify(unsigned int cpu) - return 0; - } + void intel_breadcrumbs_free(struct kref *kref) +-- +2.51.0 + +From a7269941c2d2388e7e7798e87f26049887b0f3ba Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 8 Sep 2021 19:03:41 +0200 +Subject: [PATCH 162/213] drm/i915/gt: Use spin_lock_irq() instead of + local_irq_disable() + spin_lock() + +execlists_dequeue() is invoked from a function which uses +local_irq_disable() to disable interrupts so the spin_lock() behaves +like spin_lock_irq(). +This breaks PREEMPT_RT because local_irq_disable() + spin_lock() is not +the same as spin_lock_irq(). + +execlists_dequeue_irq() and execlists_dequeue() has each one caller +only. If intel_engine_cs::active::lock is acquired and released with the +_irq suffix then it behaves almost as if execlists_dequeue() would be +invoked with disabled interrupts. The difference is the last part of the +function which is then invoked with enabled interrupts. +I can't tell if this makes a difference. From looking at it, it might +work to move the last unlock at the end of the function as I didn't find +anything that would acquire the lock again. + +Reported-by: Clark Williams +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Maarten Lankhorst +--- + .../drm/i915/gt/intel_execlists_submission.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +index 2065be5a196b..73d815fc514b 100644 +--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c ++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + * and context switches) submission. + */ --/* -- * Return true if a panic is in progress on a remote CPU. -- * -- * On true, the local CPU should immediately release any printing resources -- * that may be needed by the panic CPU. -- */ --bool other_cpu_in_panic(void) --{ -- if (!panic_in_progress()) -- return false; -- -- /* -- * We can use raw_smp_processor_id() here because it is impossible for -- * the task to be migrated to the panic_cpu, or away from it. If -- * panic_cpu has already been set, and we're not currently executing on -- * that CPU, then we never will be. -- */ -- return atomic_read(&panic_cpu) != raw_smp_processor_id(); --} -- - /** - * console_lock - block the console subsystem from printing - * -@@ -2716,42 +2817,16 @@ int is_console_locked(void) - } - EXPORT_SYMBOL(is_console_locked); +- spin_lock(&sched_engine->lock); ++ spin_lock_irq(&sched_engine->lock); --/* -- * Check if the given console is currently capable and allowed to print -- * records. -- * -- * Requires the console_srcu_read_lock. -- */ --static inline bool console_is_usable(struct console *con) --{ -- short flags = console_srcu_read_flags(con); -- -- if (!(flags & CON_ENABLED)) -- return false; -- -- if ((flags & CON_SUSPENDED)) -- return false; -- -- if (!con->write) -- return false; -- -- /* -- * Console drivers may assume that per-cpu resources have been -- * allocated. So unless they're explicitly marked as being able to -- * cope (CON_ANYTIME) don't call them until this CPU is officially up. -- */ -- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) -- return false; -- -- return true; + /* + * If the queue is higher priority than the last +@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); + return; + } + } +@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.sched_engine->lock); +- spin_unlock(&engine->sched_engine->lock); ++ spin_unlock_irq(&engine->sched_engine->lock); + return; /* leave this for another sibling */ + } + +@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + */ + sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); + + /* + * We can skip poking the HW if we ended up with exactly the same set +@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + } + } + +-static void execlists_dequeue_irq(struct intel_engine_cs *engine) +-{ +- local_irq_disable(); /* Suspend interrupts across request submission */ +- execlists_dequeue(engine); +- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ -} - - static void __console_unlock(void) + static void clear_ports(struct i915_request **ports, int count) { - console_locked = 0; - up_console_sem(); + memset_p((void **)ports, NULL, count); +@@ -2478,7 +2471,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) + } + + if (!engine->execlists.pending[0]) { +- execlists_dequeue_irq(engine); ++ execlists_dequeue(engine); + start_timeslice(engine); + } + +-- +2.51.0 + +From 5ac30ab01c4502834914dc18257fb74a71098bb0 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 1 Oct 2021 20:01:03 +0200 +Subject: [PATCH 163/213] drm/i915: Drop the irqs_disabled() check + +The !irqs_disabled() check triggers on PREEMPT_RT even with +i915_sched_engine::lock acquired. The reason is the lock is transformed +into a sleeping lock on PREEMPT_RT and does not disable interrupts. + +There is no need to check for disabled interrupts. The lockdep +annotation below already check if the lock has been acquired by the +caller and will yell if the interrupts are not disabled. + +Remove the !irqs_disabled() check. + +Reported-by: Maarten Lankhorst +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/i915_request.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c +index f59081066a19..014d02029a41 100644 +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -609,7 +609,6 @@ bool __i915_request_submit(struct i915_request *request) + + RQ_TRACE(request, "\n"); + +- GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->sched_engine->lock); + + /* +@@ -718,7 +717,6 @@ void __i915_request_unsubmit(struct i915_request *request) + */ + RQ_TRACE(request, "\n"); + +- GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->sched_engine->lock); + + /* +-- +2.51.0 + +From 9cdbf162c40ca0920376ada1f2d2ba69d8426ec0 Mon Sep 17 00:00:00 2001 +From: Tvrtko Ursulin +Date: Wed, 5 Jul 2023 10:30:25 +0100 +Subject: [PATCH 164/213] drm/i915: Do not disable preemption for resets + +Commit ade8a0f59844 ("drm/i915: Make all GPU resets atomic") added a +preempt disable section over the hardware reset callback to prepare the +driver for being able to reset from atomic contexts. + +In retrospect I can see that the work item at a time was about removing +the struct mutex from the reset path. Code base also briefly entertained +the idea of doing the reset under stop_machine in order to serialize +userspace mmap and temporary glitch in the fence registers (see +eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex"), +but that never materialized and was soon removed in 2caffbf11762 +("drm/i915: Revoke mmaps and prevent access to fence registers across +reset") and replaced with a SRCU based solution. + +As such, as far as I can see, today we still have a requirement that +resets must not sleep (invoked from submission tasklets), but no need to +support invoking them from a truly atomic context. + +Given that the preemption section is problematic on RT kernels, since the +uncore lock becomes a sleeping lock and so is invalid in such section, +lets try and remove it. Potential downside is that our short waits on GPU +to complete the reset may get extended if CPU scheduling interferes, but +in practice that probably isn't a deal breaker. + +In terms of mechanics, since the preemption disabled block is being +removed we just need to replace a few of the wait_for_atomic macros into +busy looping versions which will work (and not complain) when called from +non-atomic sections. + +Signed-off-by: Tvrtko Ursulin +Cc: Chris Wilson +Cc: Paul Gortmaker +Cc: Sebastian Andrzej Siewior +Acked-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20230705093025.3689748-1-tvrtko.ursulin@linux.intel.com +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/gt/intel_reset.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c +index 13fb8e5042c5..b51fb0c97772 100644 +--- a/drivers/gpu/drm/i915/gt/intel_reset.c ++++ b/drivers/gpu/drm/i915/gt/intel_reset.c +@@ -164,13 +164,13 @@ static int i915_do_reset(struct intel_gt *gt, + /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + udelay(50); +- err = wait_for_atomic(i915_in_reset(pdev), 50); ++ err = _wait_for_atomic(i915_in_reset(pdev), 50, 0); + + /* Clear the reset request. */ + pci_write_config_byte(pdev, I915_GDRST, 0); + udelay(50); + if (!err) +- err = wait_for_atomic(!i915_in_reset(pdev), 50); ++ err = _wait_for_atomic(!i915_in_reset(pdev), 50, 0); + + return err; } +@@ -190,7 +190,7 @@ static int g33_do_reset(struct intel_gt *gt, + struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); -+static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); -+ -+#ifdef CONFIG_PRINTK -+ - /* - * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This - * is achieved by shifting the existing message over and inserting the dropped -@@ -2766,8 +2841,7 @@ static void __console_unlock(void) - * - * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. - */ --#ifdef CONFIG_PRINTK --static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) -+void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) - { - struct printk_buffers *pbufs = pmsg->pbufs; - const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); -@@ -2798,9 +2872,6 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d - memcpy(outbuf, scratchbuf, len); - pmsg->outbuf_len += len; + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); +- return wait_for_atomic(g4x_reset_complete(pdev), 50); ++ return _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); } --#else --#define console_prepend_dropped(pmsg, dropped) --#endif /* CONFIG_PRINTK */ - /* - * Read and format the specified record (or a later record if the specified -@@ -2821,11 +2892,9 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d - * of @pmsg are valid. (See the documentation of struct printk_message - * for information about the @pmsg fields.) - */ --static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -- bool is_extended, bool may_suppress) -+bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -+ bool is_extended, bool may_suppress) - { -- static int panic_console_dropped; -- - struct printk_buffers *pbufs = pmsg->pbufs; - const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); - const size_t outbuf_sz = sizeof(pbufs->outbuf); -@@ -2853,17 +2922,6 @@ static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - pmsg->seq = r.info->seq; - pmsg->dropped = r.info->seq - seq; + static int g4x_do_reset(struct intel_gt *gt, +@@ -207,7 +207,7 @@ static int g4x_do_reset(struct intel_gt *gt, -- /* -- * Check for dropped messages in panic here so that printk -- * suppression can occur as early as possible if necessary. -- */ -- if (pmsg->dropped && -- panic_in_progress() && -- panic_console_dropped++ > 10) { -- suppress_panic_printk = 1; -- pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); -- } -- - /* Skip record that has level above the console loglevel. */ - if (may_suppress && suppress_message_printing(r.info->level)) + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_MEDIA | GRDOM_RESET_ENABLE); +- ret = wait_for_atomic(g4x_reset_complete(pdev), 50); ++ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); + if (ret) { + GT_TRACE(gt, "Wait for media reset failed\n"); goto out; -@@ -2880,6 +2938,13 @@ static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - return true; - } +@@ -215,7 +215,7 @@ static int g4x_do_reset(struct intel_gt *gt, -+/* -+ * Used as the printk buffers for non-panic, serialized console printing. -+ * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. -+ * Its usage requires the console_lock held. -+ */ -+struct printk_buffers printk_shared_pbufs; -+ - /* - * Print one record for the given console. The record printed is whatever - * record is the next available record for the given console. -@@ -2897,12 +2962,10 @@ static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - */ - static bool console_emit_next_record(struct console *con, bool *handover, int cookie) - { -- static struct printk_buffers pbufs; -- - bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; -- char *outbuf = &pbufs.outbuf[0]; -+ char *outbuf = &printk_shared_pbufs.outbuf[0]; - struct printk_message pmsg = { -- .pbufs = &pbufs, -+ .pbufs = &printk_shared_pbufs, - }; - unsigned long flags; + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_RENDER | GRDOM_RESET_ENABLE); +- ret = wait_for_atomic(g4x_reset_complete(pdev), 50); ++ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); + if (ret) { + GT_TRACE(gt, "Wait for render reset failed\n"); + goto out; +@@ -785,9 +785,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) + reset_mask = wa_14015076503_start(gt, engine_mask, !retry); -@@ -2924,35 +2987,59 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co - con->dropped = 0; - } + GT_TRACE(gt, "engine_mask=%x\n", reset_mask); +- preempt_disable(); + ret = reset(gt, reset_mask, retry); +- preempt_enable(); -- /* -- * While actively printing out messages, if another printk() -- * were to occur on another CPU, it may wait for this one to -- * finish. This task can not be preempted if there is a -- * waiter waiting to take over. -- * -- * Interrupts are disabled because the hand over to a waiter -- * must not be interrupted until the hand over is completed -- * (@console_waiter is cleared). -- */ -- printk_safe_enter_irqsave(flags); -- console_lock_spinning_enable(); -+ /* Write everything out to the hardware. */ + wa_14015076503_end(gt, reset_mask); + } +-- +2.51.0 + +From 3d457657b1f476e8f1ca8b51438c2bea965ecaf3 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Tue, 3 Oct 2023 21:37:21 +0200 +Subject: [PATCH 165/213] drm/i915/guc: Consider also RCU depth in busy loop. + +intel_guc_send_busy_loop() looks at in_atomic() and irqs_disabled() to +decide if it should busy-spin while waiting or if it may sleep. +Both checks will report false on PREEMPT_RT if sleeping spinlocks are +acquired leading to RCU splats while the function sleeps. + +Check also if RCU has been disabled. + +Reported-by: "John B. Wyatt IV" +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h +index 8dc291ff0093..5b8d084c9c58 100644 +--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h ++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h +@@ -317,7 +317,7 @@ static inline int intel_guc_send_busy_loop(struct intel_guc *guc, + { + int err; + unsigned int sleep_period_ms = 1; +- bool not_atomic = !in_atomic() && !irqs_disabled(); ++ bool not_atomic = !in_atomic() && !irqs_disabled() && !rcu_preempt_depth(); -- /* Do not trace print latency. */ -- stop_critical_timings(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ /* -+ * On PREEMPT_RT this function is either in a thread or -+ * panic context. So there is no need for concern about -+ * printk reentrance, handovers, or lockdep complaints. -+ */ + /* + * FIXME: Have caller pass in if we are in an atomic context to avoid +-- +2.51.0 + +From 03e9767e39a3dc13f31b3b1d9d55876eb5d7164e Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 21 Feb 2022 17:59:14 +0100 +Subject: [PATCH 166/213] Revert "drm/i915: Depend on !PREEMPT_RT." + +Once the known issues are addressed, it should be safe to enable the +driver. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/Kconfig | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig +index ce397a8797f7..98c3f532822d 100644 +--- a/drivers/gpu/drm/i915/Kconfig ++++ b/drivers/gpu/drm/i915/Kconfig +@@ -3,7 +3,6 @@ config DRM_I915 + tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" + depends on DRM + depends on X86 && PCI +- depends on !PREEMPT_RT + select INTEL_GTT if X86 + select INTERVAL_TREE + # we need shmfs for the swappable backing store, and in particular +-- +2.51.0 + +From 8a9ba4130735860473a3c0a76340a64e0f2c6d85 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 23 Sep 2023 03:11:05 +0200 +Subject: [PATCH 167/213] sched: define TIF_ALLOW_RESCHED + +On Fri, Sep 22 2023 at 00:55, Thomas Gleixner wrote: +> On Thu, Sep 21 2023 at 09:00, Linus Torvalds wrote: +>> That said - I think as a proof of concept and "look, with this we get +>> the expected scheduling event counts", that patch is perfect. I think +>> you more than proved the concept. +> +> There is certainly quite some analyis work to do to make this a one to +> one replacement. +> +> With a handful of benchmarks the PoC (tweaked with some obvious fixes) +> is pretty much on par with the current mainline variants (NONE/FULL), +> but the memtier benchmark makes a massive dent. +> +> It sports a whopping 10% regression with the LAZY mode versus the mainline +> NONE model. Non-LAZY and FULL behave unsurprisingly in the same way. +> +> That benchmark is really sensitive to the preemption model. With current +> mainline (DYNAMIC_PREEMPT enabled) the preempt=FULL model has ~20% +> performance drop versus preempt=NONE. + +That 20% was a tired pilot error. The real number is in the 5% ballpark. + +> I have no clue what's going on there yet, but that shows that there is +> obviously quite some work ahead to get this sorted. + +It took some head scratching to figure that out. The initial fix broke +the handling of the hog issue, i.e. the problem that Ankur tried to +solve, but I hacked up a "solution" for that too. + +With that the memtier benchmark is roughly back to the mainline numbers, +but my throughput benchmark know how is pretty close to zero, so that +should be looked at by people who actually understand these things. + +Likewise the hog prevention is just at the PoC level and clearly beyond +my knowledge of scheduler details: It unconditionally forces a +reschedule when the looping task is not responding to a lazy reschedule +request before the next tick. IOW it forces a reschedule on the second +tick, which is obviously different from the cond_resched()/might_sleep() +behaviour. + +The changes vs. the original PoC aside of the bug and thinko fixes: + + 1) A hack to utilize the TRACE_FLAG_IRQS_NOSUPPORT flag to trace the + lazy preempt bit as the trace_entry::flags field is full already. + + That obviously breaks the tracer ABI, but if we go there then + this needs to be fixed. Steven? + + 2) debugfs file to validate that loops can be force preempted w/o + cond_resched() + + The usage is: + + # taskset -c 1 bash + # echo 1 > /sys/kernel/debug/sched/hog & + # echo 1 > /sys/kernel/debug/sched/hog & + # echo 1 > /sys/kernel/debug/sched/hog & + + top shows ~33% CPU for each of the hogs and tracing confirms that + the crude hack in the scheduler tick works: + + bash-4559 [001] dlh2. 2253.331202: resched_curr <-__update_curr + bash-4560 [001] dlh2. 2253.340199: resched_curr <-__update_curr + bash-4561 [001] dlh2. 2253.346199: resched_curr <-__update_curr + bash-4559 [001] dlh2. 2253.353199: resched_curr <-__update_curr + bash-4561 [001] dlh2. 2253.358199: resched_curr <-__update_curr + bash-4560 [001] dlh2. 2253.370202: resched_curr <-__update_curr + bash-4559 [001] dlh2. 2253.378198: resched_curr <-__update_curr + bash-4561 [001] dlh2. 2253.389199: resched_curr <-__update_curr + + The 'l' instead of the usual 'N' reflects that the lazy resched + bit is set. That makes __update_curr() invoke resched_curr() + instead of the lazy variant. resched_curr() sets TIF_NEED_RESCHED + and folds it into preempt_count so that preemption happens at the + next possible point, i.e. either in return from interrupt or at + the next preempt_enable(). + +That's as much as I wanted to demonstrate and I'm not going to spend +more cycles on it as I have already too many other things on flight and +the resulting scheduler woes are clearly outside of my expertice. + +Though definitely I'm putting a permanent NAK in place for any attempts +to duct tape the preempt=NONE model any further by sprinkling more +cond*() and whatever warts around. + +Thanks, + + tglx + +[tglx: s@CONFIG_PREEMPT_AUTO@CONFIG_PREEMPT_BUILD_AUTO@ ] + +Link: https://lore.kernel.org/all/87jzshhexi.ffs@tglx/ +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/x86/Kconfig | 1 + + arch/x86/include/asm/thread_info.h | 6 ++-- + drivers/acpi/processor_idle.c | 2 +- + include/linux/entry-common.h | 2 +- + include/linux/entry-kvm.h | 2 +- + include/linux/sched.h | 12 ++++--- + include/linux/sched/idle.h | 8 ++--- + include/linux/thread_info.h | 24 ++++++++++++++ + include/linux/trace_events.h | 8 ++--- + kernel/Kconfig.preempt | 17 +++++++++- + kernel/entry/common.c | 4 +-- + kernel/entry/kvm.c | 2 +- + kernel/sched/core.c | 50 +++++++++++++++++++++--------- + kernel/sched/debug.c | 19 ++++++++++++ + kernel/sched/fair.c | 46 ++++++++++++++++++--------- + kernel/sched/features.h | 2 ++ + kernel/sched/idle.c | 3 +- + kernel/sched/sched.h | 1 + + kernel/trace/trace.c | 2 ++ + kernel/trace/trace_output.c | 16 ++++++++-- + 20 files changed, 171 insertions(+), 56 deletions(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 14e47444817a..07833bae543f 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -273,6 +273,7 @@ config X86 + select HAVE_STATIC_CALL + select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL + select HAVE_PREEMPT_DYNAMIC_CALL ++ select HAVE_PREEMPT_AUTO + select HAVE_RSEQ + select HAVE_RUST if X86_64 + select HAVE_SYSCALL_TRACEPOINTS +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index d63b02940747..1ff38ebbd588 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -81,8 +81,9 @@ struct thread_info { + #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ + #define TIF_SIGPENDING 2 /* signal pending */ + #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +-#define TIF_SSBD 5 /* Speculative store bypass disable */ ++#define TIF_ARCH_RESCHED_LAZY 4 /* Lazy rescheduling */ ++#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/ ++#define TIF_SSBD 6 /* Speculative store bypass disable */ + #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ + #define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */ + #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ +@@ -104,6 +105,7 @@ struct thread_info { + #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) ++#define _TIF_ARCH_RESCHED_LAZY (1 << TIF_ARCH_RESCHED_LAZY) + #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) + #define _TIF_SSBD (1 << TIF_SSBD) + #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c +index 0888e4d618d5..f3c6f73d3869 100644 +--- a/drivers/acpi/processor_idle.c ++++ b/drivers/acpi/processor_idle.c +@@ -107,7 +107,7 @@ static const struct dmi_system_id processor_power_dmi_table[] = { + */ + static void __cpuidle acpi_safe_halt(void) + { +- if (!tif_need_resched()) { ++ if (!need_resched()) { + raw_safe_halt(); + raw_local_irq_disable(); + } +diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h +index d95ab85f96ba..8b3ab0cc1334 100644 +--- a/include/linux/entry-common.h ++++ b/include/linux/entry-common.h +@@ -60,7 +60,7 @@ + #define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ +- ARCH_EXIT_TO_USER_MODE_WORK) ++ _TIF_NEED_RESCHED_LAZY | ARCH_EXIT_TO_USER_MODE_WORK) -- /* Write everything out to the hardware. */ -- con->write(con, outbuf, pmsg.outbuf_len); -+ con->write(con, outbuf, pmsg.outbuf_len); -+ con->seq = pmsg.seq + 1; -+ } else { -+ /* -+ * While actively printing out messages, if another printk() -+ * were to occur on another CPU, it may wait for this one to -+ * finish. This task can not be preempted if there is a -+ * waiter waiting to take over. -+ * -+ * Interrupts are disabled because the hand over to a waiter -+ * must not be interrupted until the hand over is completed -+ * (@console_waiter is cleared). -+ */ -+ printk_safe_enter_irqsave(flags); -+ console_lock_spinning_enable(); + /** + * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs +diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h +index 6813171afccb..674a622c91be 100644 +--- a/include/linux/entry-kvm.h ++++ b/include/linux/entry-kvm.h +@@ -18,7 +18,7 @@ -- start_critical_timings(); -+ /* Do not trace print latency. */ -+ stop_critical_timings(); + #define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ +- _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) ++ _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED_LAZY | ARCH_XFER_TO_GUEST_MODE_WORK) -- con->seq = pmsg.seq + 1; -+ lock_map_acquire_try(&printk_legacy_map); -+ con->write(con, outbuf, pmsg.outbuf_len); -+ lock_map_release(&printk_legacy_map); + struct kvm_vcpu; -- *handover = console_lock_spinning_disable_and_check(cookie); -- printk_safe_exit_irqrestore(flags); -+ start_critical_timings(); -+ -+ con->seq = pmsg.seq + 1; -+ -+ *handover = console_lock_spinning_disable_and_check(cookie); -+ printk_safe_exit_irqrestore(flags); -+ } - skip: - return true; +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 6f27f5e1effe..84343e055168 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2055,17 +2055,17 @@ static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, + update_ti_thread_flag(task_thread_info(tsk), flag, value); } -+#else -+ -+static bool console_emit_next_record(struct console *con, bool *handover, int cookie) -+{ -+ *handover = false; -+ return false; -+} -+ -+#endif /* CONFIG_PRINTK */ -+ - /* - * Print out all remaining records to all consoles. - * -@@ -2991,13 +3078,33 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove - - cookie = console_srcu_read_lock(); - for_each_console_srcu(con) { -+ short flags = console_srcu_read_flags(con); -+ u64 printk_seq; - bool progress; - -- if (!console_is_usable(con)) -+ /* -+ * console_flush_all() is only for legacy consoles, -+ * unless the nbcon console has no kthread printer. -+ */ -+ if ((flags & CON_NBCON) && con->kthread) -+ continue; -+ -+ if (!console_is_usable(con, flags, true)) - continue; - any_usable = true; - -- progress = console_emit_next_record(con, handover, cookie); -+ if (flags & CON_NBCON) { -+ -+ lock_map_acquire_try(&printk_legacy_map); -+ progress = nbcon_atomic_emit_next_record(con, handover, cookie); -+ lock_map_release(&printk_legacy_map); -+ -+ printk_seq = nbcon_seq_read(con); -+ } else { -+ progress = console_emit_next_record(con, handover, cookie); -+ -+ printk_seq = con->seq; -+ } +-static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) ++static inline bool test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) + { + return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); + } - /* - * If a handover has occurred, the SRCU read lock -@@ -3007,8 +3114,8 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove - return false; +-static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) ++static inline bool test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) + { + return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); + } - /* Track the next of the highest seq flushed. */ -- if (con->seq > *next_seq) -- *next_seq = con->seq; -+ if (printk_seq > *next_seq) -+ *next_seq = printk_seq; +-static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) ++static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag) + { + return test_ti_thread_flag(task_thread_info(tsk), flag); + } +@@ -2078,9 +2078,11 @@ static inline void set_tsk_need_resched(struct task_struct *tsk) + static inline void clear_tsk_need_resched(struct task_struct *tsk) + { + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); ++ if (IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO)) ++ clear_tsk_thread_flag(tsk, TIF_NEED_RESCHED_LAZY); + } - if (!progress) - continue; -@@ -3031,19 +3138,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove - return false; +-static inline int test_tsk_need_resched(struct task_struct *tsk) ++static inline bool test_tsk_need_resched(struct task_struct *tsk) + { + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +@@ -2261,7 +2263,7 @@ static inline int rwlock_needbreak(rwlock_t *lock) --/** -- * console_unlock - unblock the console subsystem from printing -- * -- * Releases the console_lock which the caller holds to block printing of -- * the console subsystem. -- * -- * While the console_lock was held, console output may have been buffered -- * by printk(). If this is the case, console_unlock(); emits -- * the output prior to releasing the lock. -- * -- * console_unlock(); may be called from any context. -- */ --void console_unlock(void) -+static void console_flush_and_unlock(void) + static __always_inline bool need_resched(void) { - bool do_cond_resched; - bool handover; -@@ -3087,6 +3182,32 @@ void console_unlock(void) - */ - } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); +- return unlikely(tif_need_resched()); ++ return unlikely(tif_need_resched_lazy() || tif_need_resched()); } -+ -+/** -+ * console_unlock - unblock the console subsystem from printing -+ * -+ * Releases the console_lock which the caller holds to block printing of -+ * the console subsystem. -+ * -+ * While the console_lock was held, console output may have been buffered -+ * by printk(). If this is the case, console_unlock(); emits -+ * the output prior to releasing the lock. -+ * -+ * console_unlock(); may be called from any context. -+ */ -+void console_unlock(void) -+{ -+ /* -+ * PREEMPT_RT relies on kthread and atomic consoles for printing. -+ * It never attempts to print from console_unlock(). -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ __console_unlock(); -+ return; -+ } -+ -+ console_flush_and_unlock(); -+} - EXPORT_SYMBOL(console_unlock); - /** -@@ -3197,6 +3318,7 @@ void console_flush_on_panic(enum con_flush_mode mode) + /* +diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h +index 478084f9105e..719416fe8ddc 100644 +--- a/include/linux/sched/idle.h ++++ b/include/linux/sched/idle.h +@@ -63,7 +63,7 @@ static __always_inline bool __must_check current_set_polling_and_test(void) + */ + smp_mb__after_atomic(); - if (mode == CONSOLE_REPLAY_ALL) { - struct console *c; -+ short flags; - int cookie; - u64 seq; +- return unlikely(tif_need_resched()); ++ return unlikely(need_resched()); + } -@@ -3204,16 +3326,25 @@ void console_flush_on_panic(enum con_flush_mode mode) + static __always_inline bool __must_check current_clr_polling_and_test(void) +@@ -76,7 +76,7 @@ static __always_inline bool __must_check current_clr_polling_and_test(void) + */ + smp_mb__after_atomic(); - cookie = console_srcu_read_lock(); - for_each_console_srcu(c) { -- /* -- * This is an unsynchronized assignment, but the -- * kernel is in "hope and pray" mode anyway. -- */ -- c->seq = seq; -+ flags = console_srcu_read_flags(c); -+ -+ if (flags & CON_NBCON) { -+ nbcon_seq_force(c, seq); -+ } else { -+ /* -+ * This is an unsynchronized assignment. On -+ * panic legacy consoles are only best effort. -+ */ -+ c->seq = seq; -+ } - } - console_srcu_read_unlock(cookie); - } +- return unlikely(tif_need_resched()); ++ return unlikely(need_resched()); + } -- console_flush_all(false, &next_seq, &handover); -+ nbcon_atomic_flush_all(); -+ -+ if (printing_via_unlock) -+ console_flush_all(false, &next_seq, &handover); + #else +@@ -85,11 +85,11 @@ static inline void __current_clr_polling(void) { } + + static inline bool __must_check current_set_polling_and_test(void) + { +- return unlikely(tif_need_resched()); ++ return unlikely(need_resched()); + } + static inline bool __must_check current_clr_polling_and_test(void) + { +- return unlikely(tif_need_resched()); ++ return unlikely(need_resched()); } + #endif - /* -@@ -3270,13 +3401,122 @@ EXPORT_SYMBOL(console_stop); +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index 9ea0b28068f4..5ded1450ac1a 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -59,6 +59,16 @@ enum syscall_work_bit { - void console_start(struct console *console) - { -+ short flags; -+ - console_list_lock(); - console_srcu_write_flags(console, console->flags | CON_ENABLED); -+ flags = console->flags; - console_list_unlock(); -+ -+ /* -+ * Ensure that all SRCU list walks have completed. The related -+ * printing context must be able to see it is enabled so that -+ * it is guaranteed to wake up and resume printing. -+ */ -+ synchronize_srcu(&console_srcu); -+ -+ if (flags & CON_NBCON) -+ nbcon_kthread_wake(console); -+ else -+ wake_up_legacy_kthread(); + #include + ++#ifdef CONFIG_PREEMPT_BUILD_AUTO ++# define TIF_NEED_RESCHED_LAZY TIF_ARCH_RESCHED_LAZY ++# define _TIF_NEED_RESCHED_LAZY _TIF_ARCH_RESCHED_LAZY ++# define TIF_NEED_RESCHED_LAZY_OFFSET (TIF_NEED_RESCHED_LAZY - TIF_NEED_RESCHED) ++#else ++# define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED ++# define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED ++# define TIF_NEED_RESCHED_LAZY_OFFSET 0 ++#endif + - __pr_flush(console, 1000, true); + #ifdef __KERNEL__ + + #ifndef arch_set_restart_data +@@ -185,6 +195,13 @@ static __always_inline bool tif_need_resched(void) + (unsigned long *)(¤t_thread_info()->flags)); } - EXPORT_SYMBOL(console_start); -+#ifdef CONFIG_PRINTK -+static bool printer_should_wake(void) ++static __always_inline bool tif_need_resched_lazy(void) +{ -+ bool available = false; -+ struct console *con; -+ int cookie; -+ -+ if (kthread_should_stop()) -+ return true; -+ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ short flags = console_srcu_read_flags(con); -+ u64 printk_seq; -+ -+ /* -+ * The legacy printer thread is only for legacy consoles, -+ * unless the nbcon console has no kthread printer. -+ */ -+ if ((flags & CON_NBCON) && con->kthread) -+ continue; -+ -+ if (!console_is_usable(con, flags, true)) -+ continue; -+ -+ if (flags & CON_NBCON) { -+ printk_seq = nbcon_seq_read(con); -+ } else { -+ /* -+ * It is safe to read @seq because only this -+ * thread context updates @seq. -+ */ -+ printk_seq = con->seq; -+ } -+ -+ if (prb_read_valid(prb, printk_seq, NULL)) { -+ available = true; -+ break; -+ } -+ } -+ console_srcu_read_unlock(cookie); -+ -+ return available; ++ return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && ++ arch_test_bit(TIF_NEED_RESCHED_LAZY, ++ (unsigned long *)(¤t_thread_info()->flags)); +} + -+static int nbcon_legacy_kthread_func(void *unused) + #else + + static __always_inline bool tif_need_resched(void) +@@ -193,6 +210,13 @@ static __always_inline bool tif_need_resched(void) + (unsigned long *)(¤t_thread_info()->flags)); + } + ++static __always_inline bool tif_need_resched_lazy(void) +{ -+ int error; -+ -+ for (;;) { -+ error = wait_event_interruptible(legacy_wait, printer_should_wake()); -+ -+ if (kthread_should_stop()) -+ break; -+ -+ if (error) -+ continue; -+ -+ console_lock(); -+ console_flush_and_unlock(); -+ } -+ -+ return 0; ++ return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && ++ test_bit(TIF_NEED_RESCHED_LAZY, ++ (unsigned long *)(¤t_thread_info()->flags)); +} + -+void nbcon_legacy_kthread_create(void) -+{ -+ struct task_struct *kt; -+ -+ lockdep_assert_held(&console_mutex); -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ return; -+ -+ if (!printk_threads_enabled || nbcon_legacy_kthread) -+ return; + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ + + #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index fe95d13c5e4d..63b1a24f406a 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -178,8 +178,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); + + enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, +- TRACE_FLAG_IRQS_NOSUPPORT = 0x02, +- TRACE_FLAG_NEED_RESCHED = 0x04, ++ TRACE_FLAG_NEED_RESCHED = 0x02, ++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, +@@ -205,11 +205,11 @@ static inline unsigned int tracing_gen_ctx(void) + + static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) + { +- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); ++ return tracing_gen_ctx_irq_test(0); + } + static inline unsigned int tracing_gen_ctx(void) + { +- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); ++ return tracing_gen_ctx_irq_test(0); + } + #endif + +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index c2f1fd95a821..0f3d4c2a41cb 100644 +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -11,6 +11,13 @@ config PREEMPT_BUILD + select PREEMPTION + select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK + ++config PREEMPT_BUILD_AUTO ++ bool ++ select PREEMPT_BUILD + -+ kt = kthread_run(nbcon_legacy_kthread_func, NULL, "pr/legacy"); -+ if (IS_ERR(kt)) { -+ pr_err("unable to start legacy printing thread\n"); -+ return; -+ } ++config HAVE_PREEMPT_AUTO ++ bool + -+ nbcon_legacy_kthread = kt; + choice + prompt "Preemption Model" + default PREEMPT_NONE +@@ -67,9 +74,17 @@ config PREEMPT + embedded system with latency requirements in the milliseconds + range. + ++config PREEMPT_AUTO ++ bool "Automagic preemption mode with runtime tweaking support" ++ depends on HAVE_PREEMPT_AUTO ++ select PREEMPT_BUILD_AUTO ++ help ++ Add some sensible blurb here + -+ /* -+ * It is important that console printing threads are scheduled -+ * shortly after a printk call and with generous runtime budgets. -+ */ -+ sched_set_normal(nbcon_legacy_kthread, -20); -+} -+#endif /* CONFIG_PRINTK */ + config PREEMPT_RT + bool "Fully Preemptible Kernel (Real-Time)" + depends on EXPERT && ARCH_SUPPORTS_RT ++ select PREEMPT_BUILD_AUTO if HAVE_PREEMPT_AUTO + select PREEMPTION + help + This option turns the kernel into a real-time kernel by replacing +@@ -95,7 +110,7 @@ config PREEMPTION + + config PREEMPT_DYNAMIC + bool "Preemption behaviour defined on boot" +- depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT ++ depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT && !PREEMPT_AUTO + select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY + select PREEMPT_BUILD + default y if HAVE_PREEMPT_DYNAMIC_CALL +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index 5ff4f1cd3644..fd42f0b17dec 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -161,7 +161,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + + local_irq_enable_exit_to_user(ti_work); + +- if (ti_work & _TIF_NEED_RESCHED) ++ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) + schedule(); + + if (ti_work & _TIF_UPROBE) +@@ -391,7 +391,7 @@ void raw_irqentry_exit_cond_resched(void) + rcu_irq_exit_check_preempt(); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + WARN_ON_ONCE(!on_thread_stack()); +- if (need_resched()) ++ if (test_tsk_need_resched(current)) + preempt_schedule_irq(); + } + } +diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c +index 2e0f75bcb7fd..d952fa5ee880 100644 +--- a/kernel/entry/kvm.c ++++ b/kernel/entry/kvm.c +@@ -13,7 +13,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) + return -EINTR; + } + +- if (ti_work & _TIF_NEED_RESCHED) ++ if (ti_work & (_TIF_NEED_RESCHED | TIF_NEED_RESCHED_LAZY)) + schedule(); + + if (ti_work & _TIF_NOTIFY_RESUME) +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f2e784d966b2..4c59b8f40017 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -899,14 +899,15 @@ static inline void hrtick_rq_init(struct rq *rq) + + #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) + /* +- * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, ++ * Atomically set TIF_NEED_RESCHED[_LAZY] and test for TIF_POLLING_NRFLAG, + * this avoids any races wrt polling state changes and thereby avoids + * spurious IPIs. + */ +-static inline bool set_nr_and_not_polling(struct task_struct *p) ++static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit) + { + struct thread_info *ti = task_thread_info(p); +- return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); + - static int __read_mostly keep_bootcon; ++ return !(fetch_or(&ti->flags, 1 << tif_bit) & _TIF_POLLING_NRFLAG); + } - static int __init keep_bootcon_setup(char *str) -@@ -3375,11 +3615,6 @@ static void try_enable_default_console(struct console *newcon) - newcon->flags |= CON_CONSDEV; + /* +@@ -923,7 +924,7 @@ static bool set_nr_if_polling(struct task_struct *p) + for (;;) { + if (!(val & _TIF_POLLING_NRFLAG)) + return false; +- if (val & _TIF_NEED_RESCHED) ++ if (val & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) + return true; + if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED)) + break; +@@ -932,9 +933,9 @@ static bool set_nr_if_polling(struct task_struct *p) } --#define con_printk(lvl, con, fmt, ...) \ -- printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ -- (con->flags & CON_BOOT) ? "boot" : "", \ -- con->name, con->index, ##__VA_ARGS__) -- - static void console_init_seq(struct console *newcon, bool bootcon_registered) + #else +-static inline bool set_nr_and_not_polling(struct task_struct *p) ++static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit) { - struct console *con; -@@ -3428,11 +3663,20 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) +- set_tsk_need_resched(p); ++ set_tsk_thread_flag(p, tif_bit); + return true; + } - newcon->seq = prb_next_seq(prb); - for_each_console(con) { -- if ((con->flags & CON_BOOT) && -- (con->flags & CON_ENABLED) && -- con->seq < newcon->seq) { -- newcon->seq = con->seq; -+ u64 seq; -+ -+ if (!((con->flags & CON_BOOT) && -+ (con->flags & CON_ENABLED))) { -+ continue; - } -+ -+ if (con->flags & CON_NBCON) -+ seq = nbcon_seq_read(con); -+ else -+ seq = con->seq; -+ -+ if (seq < newcon->seq) -+ newcon->seq = seq; - } - } +@@ -1040,28 +1041,47 @@ void wake_up_q(struct wake_q_head *head) + * might also involve a cross-CPU call to trigger the scheduler on + * the target CPU. + */ +-void resched_curr(struct rq *rq) ++static void __resched_curr(struct rq *rq, int lazy) + { ++ int cpu, tif_bit = TIF_NEED_RESCHED + lazy; + struct task_struct *curr = rq->curr; +- int cpu; -@@ -3493,6 +3737,15 @@ void register_console(struct console *newcon) - goto unlock; - } + lockdep_assert_rq_held(rq); -+ if (newcon->flags & CON_NBCON) { -+ /* -+ * Ensure the nbcon console buffers can be allocated -+ * before modifying any global data. -+ */ -+ if (!nbcon_alloc(newcon)) -+ goto unlock; -+ } -+ - /* - * See if we want to enable this console driver by default. - * -@@ -3520,8 +3773,11 @@ void register_console(struct console *newcon) - err = try_enable_preferred_console(newcon, false); +- if (test_tsk_need_resched(curr)) ++ if (unlikely(test_tsk_thread_flag(curr, tif_bit))) + return; - /* printk() messages are not printed to the Braille console. */ -- if (err || newcon->flags & CON_BRL) -+ if (err || newcon->flags & CON_BRL) { -+ if (newcon->flags & CON_NBCON) -+ nbcon_free(newcon); - goto unlock; -+ } + cpu = cpu_of(rq); - /* - * If we have a bootconsole, and are switching to a real console, -@@ -3537,6 +3793,17 @@ void register_console(struct console *newcon) - newcon->dropped = 0; - console_init_seq(newcon, bootcon_registered); + if (cpu == smp_processor_id()) { +- set_tsk_need_resched(curr); +- set_preempt_need_resched(); ++ set_tsk_thread_flag(curr, tif_bit); ++ if (!lazy) ++ set_preempt_need_resched(); + return; + } -+ if (newcon->flags & CON_NBCON) { -+ have_nbcon_console = true; -+ nbcon_init(newcon); +- if (set_nr_and_not_polling(curr)) +- smp_send_reschedule(cpu); +- else ++ if (set_nr_and_not_polling(curr, tif_bit)) { ++ if (!lazy) ++ smp_send_reschedule(cpu); + } else { -+ have_legacy_console = true; -+ nbcon_legacy_kthread_create(); + trace_sched_wake_idle_without_ipi(cpu); + } ++} + -+ if (newcon->flags & CON_BOOT) -+ have_boot_console = true; -+ - /* - * Put this console in the list - keep the - * preferred driver at the head of the list. -@@ -3589,6 +3856,11 @@ EXPORT_SYMBOL(register_console); - /* Must be called under console_list_lock(). */ - static int unregister_console_locked(struct console *console) - { -+ bool is_boot_con = (console->flags & CON_BOOT); -+ bool found_legacy_con = false; -+ bool found_nbcon_con = false; -+ bool found_boot_con = false; -+ struct console *c; - int res; - - lockdep_assert_console_list_lock_held(); -@@ -3628,11 +3900,50 @@ static int unregister_console_locked(struct console *console) - */ - synchronize_srcu(&console_srcu); - -+ if (console->flags & CON_NBCON) -+ nbcon_free(console); -+ - console_sysfs_notify(); - - if (console->exit) - res = console->exit(console); - -+ /* -+ * With this console gone, the global flags tracking registered -+ * console types may have changed. Update them. -+ */ -+ for_each_console(c) { -+ if (c->flags & CON_BOOT) -+ found_boot_con = true; -+ -+ if (c->flags & CON_NBCON) -+ found_nbcon_con = true; -+ else -+ found_legacy_con = true; -+ } -+ if (!found_boot_con) -+ have_boot_console = false; -+ if (!found_legacy_con) -+ have_legacy_console = false; -+ if (!found_nbcon_con) -+ have_nbcon_console = false; ++void resched_curr(struct rq *rq) ++{ ++ __resched_curr(rq, 0); ++} + -+ /* -+ * When the last boot console unregisters, start up the -+ * printing threads. -+ */ -+ if (is_boot_con && !have_boot_console) { -+ for_each_console(c) -+ nbcon_kthread_create(c); -+ } ++void resched_curr_lazy(struct rq *rq) ++{ ++ int lazy = IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && !sched_feat(FORCE_NEED_RESCHED) ? ++ TIF_NEED_RESCHED_LAZY_OFFSET : 0; + -+#ifdef CONFIG_PRINTK -+ if (!printing_via_unlock && nbcon_legacy_kthread) { -+ kthread_stop(nbcon_legacy_kthread); -+ nbcon_legacy_kthread = NULL; -+ } -+#endif ++ if (lazy && unlikely(test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED))) ++ return; + - return res; ++ __resched_curr(rq, lazy); } -@@ -3777,69 +4088,94 @@ late_initcall(printk_late_init); - /* If @con is specified, only wait for that console. Otherwise wait for all. */ - static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) - { -- int remaining = timeout_ms; -+ unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); -+ unsigned long remaining_jiffies = timeout_jiffies; - struct console *c; - u64 last_diff = 0; - u64 printk_seq; -+ short flags; -+ bool locked; - int cookie; - u64 diff; - u64 seq; - - might_sleep(); - -- seq = prb_next_seq(prb); -+ seq = prb_next_reserve_seq(prb); + void resched_cpu(int cpu) +@@ -1134,7 +1154,7 @@ static void wake_up_idle_cpu(int cpu) + if (cpu == smp_processor_id()) + return; -- /* Flush the consoles so that records up to @seq are printed. */ -- console_lock(); -- console_unlock(); -+ /* -+ * Flush the consoles so that records up to @seq are printed. -+ * Otherwise this function will just wait for the threaded printers -+ * to print up to @seq. -+ */ -+ if (printing_via_unlock && !IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ console_lock(); -+ console_unlock(); -+ } +- if (set_nr_and_not_polling(rq->idle)) ++ if (set_nr_and_not_polling(rq->idle, TIF_NEED_RESCHED)) + smp_send_reschedule(cpu); + else + trace_sched_wake_idle_without_ipi(cpu); +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index 115e266db76b..90028c5ba31a 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -333,6 +333,23 @@ static const struct file_operations sched_debug_fops = { + .release = seq_release, + }; - for (;;) { -+ unsigned long begin_jiffies; -+ unsigned long slept_jiffies; ++static ssize_t sched_hog_write(struct file *filp, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ unsigned long end = jiffies + 60 * HZ; ++ ++ for (; time_before(jiffies, end) && !signal_pending(current);) ++ cpu_relax(); ++ ++ return cnt; ++} + -+ locked = false; - diff = 0; ++static const struct file_operations sched_hog_fops = { ++ .write = sched_hog_write, ++ .open = simple_open, ++ .llseek = default_llseek, ++}; ++ + static struct dentry *debugfs_sched; -- /* -- * Hold the console_lock to guarantee safe access to -- * console->seq. Releasing console_lock flushes more -- * records in case @seq is still not printed on all -- * usable consoles. -- */ -- console_lock(); -+ if (printing_via_unlock) { -+ /* -+ * Hold the console_lock to guarantee safe access to -+ * console->seq. Releasing console_lock flushes more -+ * records in case @seq is still not printed on all -+ * usable consoles. -+ */ -+ console_lock(); -+ locked = true; -+ } + static __init int sched_init_debug(void) +@@ -374,6 +391,8 @@ static __init int sched_init_debug(void) - cookie = console_srcu_read_lock(); - for_each_console_srcu(c) { - if (con && con != c) - continue; -+ -+ flags = console_srcu_read_flags(c); + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); + ++ debugfs_create_file("hog", 0200, debugfs_sched, NULL, &sched_hog_fops); + - /* - * If consoles are not usable, it cannot be expected - * that they make forward progress, so only increment - * @diff for usable consoles. - */ -- if (!console_is_usable(c)) -+ if (!console_is_usable(c, flags, true) && -+ !console_is_usable(c, flags, false)) { - continue; -- printk_seq = c->seq; -+ } + return 0; + } + late_initcall(sched_init_debug); +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 1cf43e91ae9d..0fba05a251ca 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1022,8 +1022,10 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); + * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i + * this is probably good enough. + */ +-static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) ++static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se, bool tick) + { ++ struct rq *rq = rq_of(cfs_rq); + -+ if (flags & CON_NBCON) { -+ printk_seq = nbcon_seq_read(c); -+ } else { -+ WARN_ON_ONCE(!locked); -+ printk_seq = c->seq; -+ } + if ((s64)(se->vruntime - se->deadline) < 0) + return; + +@@ -1042,10 +1044,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) + /* + * The task has consumed its request, reschedule. + */ +- if (cfs_rq->nr_running > 1) { +- resched_curr(rq_of(cfs_rq)); +- clear_buddies(cfs_rq, se); ++ if (cfs_rq->nr_running < 2) ++ return; + - if (printk_seq < seq) - diff += seq - printk_seq; - } - console_srcu_read_unlock(cookie); ++ if (!IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) || sched_feat(FORCE_NEED_RESCHED)) { ++ resched_curr(rq); ++ } else { ++ /* Did the task ignore the lazy reschedule request? */ ++ if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) ++ resched_curr(rq); ++ else ++ resched_curr_lazy(rq); + } ++ clear_buddies(cfs_rq, se); + } - if (diff != last_diff && reset_on_progress) -- remaining = timeout_ms; -+ remaining_jiffies = timeout_jiffies; + #include "pelt.h" +@@ -1198,7 +1209,7 @@ s64 update_curr_common(struct rq *rq) + /* + * Update the current task's runtime statistics. + */ +-static void update_curr(struct cfs_rq *cfs_rq) ++static void __update_curr(struct cfs_rq *cfs_rq, bool tick) + { + struct sched_entity *curr = cfs_rq->curr; + s64 delta_exec; +@@ -1211,7 +1222,7 @@ static void update_curr(struct cfs_rq *cfs_rq) + return; -- console_unlock(); -+ if (locked) -+ console_unlock(); + curr->vruntime += calc_delta_fair(delta_exec, curr); +- update_deadline(cfs_rq, curr); ++ update_deadline(cfs_rq, curr, tick); + update_min_vruntime(cfs_rq); - /* Note: @diff is 0 if there are no usable consoles. */ -- if (diff == 0 || remaining == 0) -+ if (diff == 0 || remaining_jiffies == 0) - break; + if (entity_is_task(curr)) +@@ -1220,6 +1231,11 @@ static void update_curr(struct cfs_rq *cfs_rq) + account_cfs_rq_runtime(cfs_rq, delta_exec); + } -- if (remaining < 0) { -- /* no timeout limit */ -- msleep(100); -- } else if (remaining < 100) { -- msleep(remaining); -- remaining = 0; -- } else { -- msleep(100); -- remaining -= 100; -- } -+ /* msleep(1) might sleep much longer. Check time by jiffies. */ -+ begin_jiffies = jiffies; -+ msleep(1); -+ slept_jiffies = jiffies - begin_jiffies; ++static inline void update_curr(struct cfs_rq *cfs_rq) ++{ ++ __update_curr(cfs_rq, false); ++} + -+ remaining_jiffies -= min(slept_jiffies, remaining_jiffies); + static void update_curr_fair(struct rq *rq) + { + update_curr(cfs_rq_of(&rq->curr->se)); +@@ -5515,7 +5531,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) + /* + * Update run-time statistics of the 'current'. + */ +- update_curr(cfs_rq); ++ __update_curr(cfs_rq, true); - last_diff = diff; + /* + * Ensure that runnable average is periodically updated. +@@ -5529,7 +5545,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) + * validating it and just reschedule. + */ + if (queued) { +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + return; } -@@ -3880,9 +4216,16 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) - int pending = this_cpu_xchg(printk_pending, 0); + /* +@@ -5675,7 +5691,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) + * hierarchy can be throttled + */ + if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + } - if (pending & PRINTK_PENDING_OUTPUT) { -- /* If trylock fails, someone else is doing the printing */ -- if (console_trylock()) -- console_unlock(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ wake_up_interruptible(&legacy_wait); -+ } else { -+ /* -+ * If trylock fails, some other context -+ * will do the printing. -+ */ -+ if (console_trylock()) -+ console_unlock(); -+ } - } + static __always_inline +@@ -5935,7 +5951,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) - if (pending & PRINTK_PENDING_WAKEUP) -@@ -3950,11 +4293,16 @@ void defer_console_output(void) - * New messages may have been added directly to the ringbuffer - * using vprintk_store(), so wake any waiters as well. + /* Determine whether we need to wake up potentially idle CPU: */ + if (rq->curr == rq->idle && rq->cfs.nr_running) +- resched_curr(rq); ++ resched_curr_lazy(rq); + } + + #ifdef CONFIG_SMP +@@ -6640,7 +6656,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) + + if (delta < 0) { + if (task_current(rq, p)) +- resched_curr(rq); ++ resched_curr_lazy(rq); + return; + } + hrtick_start(rq, delta); +@@ -8334,7 +8350,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int + * prevents us from potentially nominating it as a false LAST_BUDDY + * below. */ -- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); -+ int val = PRINTK_PENDING_WAKEUP; -+ -+ if (printing_via_unlock) -+ val |= PRINTK_PENDING_OUTPUT; -+ __wake_up_klogd(val); +- if (test_tsk_need_resched(curr)) ++ if (need_resched()) + return; + + if (!sched_feat(WAKEUP_PREEMPTION)) +@@ -8372,7 +8388,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int + return; + + preempt: +- resched_curr(rq); ++ resched_curr_lazy(rq); } - void printk_trigger_flush(void) - { -+ nbcon_wake_threads(); - defer_console_output(); + #ifdef CONFIG_SMP +@@ -12538,7 +12554,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr) + */ + if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 && + __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) +- resched_curr(rq); ++ resched_curr_lazy(rq); } -diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c -index fde338606..e7b808b82 100644 ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -6,6 +6,7 @@ - #include - #include - #include "printk_ringbuffer.h" -+#include "internal.h" + /* +@@ -12703,7 +12719,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) + */ + if (task_current(rq, p)) { + if (p->prio > oldprio) +- resched_curr(rq); ++ resched_curr_lazy(rq); + } else + wakeup_preempt(rq, p, 0); + } +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index f770168230ae..dd8b35f67fed 100644 +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -89,3 +89,5 @@ SCHED_FEAT(UTIL_EST_FASTUP, true) + SCHED_FEAT(LATENCY_WARN, false) - /** - * DOC: printk_ringbuffer overview -@@ -303,6 +304,9 @@ - * - * desc_push_tail:B / desc_reserve:D - * set descriptor reusable (state), then push descriptor tail (id) -+ * -+ * desc_update_last_finalized:A / desc_last_finalized_seq:A -+ * store finalized record, then set new highest finalized sequence number - */ + SCHED_FEAT(HZ_BW, true) ++ ++SCHED_FEAT(FORCE_NEED_RESCHED, false) +diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c +index 565f8374ddbb..22d70000ab59 100644 +--- a/kernel/sched/idle.c ++++ b/kernel/sched/idle.c +@@ -57,8 +57,7 @@ static noinline int __cpuidle cpu_idle_poll(void) + ct_cpuidle_enter(); - #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) -@@ -1030,9 +1034,13 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, - unsigned long next_lpos; + raw_local_irq_enable(); +- while (!tif_need_resched() && +- (cpu_idle_force_poll || tick_check_broadcast_expired())) ++ while (!need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) + cpu_relax(); + raw_local_irq_disable(); - if (size == 0) { -- /* Specify a data-less block. */ -- blk_lpos->begin = NO_LPOS; -- blk_lpos->next = NO_LPOS; -+ /* -+ * Data blocks are not created for empty lines. Instead, the -+ * reader will recognize these special lpos values and handle -+ * it appropriately. -+ */ -+ blk_lpos->begin = EMPTY_LINE_LPOS; -+ blk_lpos->next = EMPTY_LINE_LPOS; - return NULL; - } +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index f7cb505ab337..4a4977754e36 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2443,6 +2443,7 @@ extern void init_sched_fair_class(void); + extern void reweight_task(struct task_struct *p, const struct load_weight *lw); -@@ -1210,10 +1218,18 @@ static const char *get_data(struct prb_data_ring *data_ring, + extern void resched_curr(struct rq *rq); ++extern void resched_curr_lazy(struct rq *rq); + extern void resched_cpu(int cpu); - /* Data-less data block description. */ - if (BLK_DATALESS(blk_lpos)) { -- if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { -+ /* -+ * Records that are just empty lines are also valid, even -+ * though they do not have a data block. For such records -+ * explicitly return empty string data to signify success. -+ */ -+ if (blk_lpos->begin == EMPTY_LINE_LPOS && -+ blk_lpos->next == EMPTY_LINE_LPOS) { - *data_size = 0; - return ""; - } -+ -+ /* Data lost, invalid, or otherwise unavailable. */ - return NULL; - } + extern struct rt_bandwidth def_rt_bandwidth; +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index a32c8637503d..6bdc2f8a33cd 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2705,6 +2705,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) -@@ -1441,20 +1457,118 @@ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer - return false; - } + if (tif_need_resched()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; ++ if (tif_need_resched_lazy()) ++ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; + return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | +diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c +index 448ee37ae245..6816eca297d7 100644 +--- a/kernel/trace/trace_output.c ++++ b/kernel/trace/trace_output.c +@@ -464,17 +464,29 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' : + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + bh_off ? 'b' : +- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : ++ !IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' : + '.'; -+/* -+ * @last_finalized_seq value guarantees that all records up to and including -+ * this sequence number are finalized and can be read. The only exception are -+ * too old records which have already been overwritten. -+ * -+ * It is also guaranteed that @last_finalized_seq only increases. -+ * -+ * Be aware that finalized records following non-finalized records are not -+ * reported because they are not yet available to the reader. For example, -+ * a new record stored via printk() will not be available to a printer if -+ * it follows a record that has not been finalized yet. However, once that -+ * non-finalized record becomes finalized, @last_finalized_seq will be -+ * appropriately updated and the full set of finalized records will be -+ * available to the printer. And since each printk() caller will either -+ * directly print or trigger deferred printing of all available unprinted -+ * records, all printk() messages will get printed. -+ */ -+static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ unsigned long ulseq; -+ -+ /* -+ * Guarantee the sequence number is loaded before loading the -+ * associated record in order to guarantee that the record can be -+ * seen by this CPU. This pairs with desc_update_last_finalized:A. -+ */ -+ ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq -+ ); /* LMM(desc_last_finalized_seq:A) */ -+ -+ return __ulseq_to_u64seq(rb, ulseq); -+} -+ -+static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, -+ struct printk_record *r, unsigned int *line_count); -+ -+/* -+ * Check if there are records directly following @last_finalized_seq that are -+ * finalized. If so, update @last_finalized_seq to the latest of these -+ * records. It is not allowed to skip over records that are not yet finalized. -+ */ -+static void desc_update_last_finalized(struct printk_ringbuffer *rb) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ u64 old_seq = desc_last_finalized_seq(rb); -+ unsigned long oldval; -+ unsigned long newval; -+ u64 finalized_seq; -+ u64 try_seq; -+ -+try_again: -+ finalized_seq = old_seq; -+ try_seq = finalized_seq + 1; -+ -+ /* Try to find later finalized records. */ -+ while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { -+ finalized_seq = try_seq; -+ try_seq++; -+ } -+ -+ /* No update needed if no later finalized record was found. */ -+ if (finalized_seq == old_seq) -+ return; -+ -+ oldval = __u64seq_to_ulseq(old_seq); -+ newval = __u64seq_to_ulseq(finalized_seq); -+ -+ /* -+ * Set the sequence number of a later finalized record that has been -+ * seen. -+ * -+ * Guarantee the record data is visible to other CPUs before storing -+ * its sequence number. This pairs with desc_last_finalized_seq:A. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_last_finalized_seq:A reads from -+ * desc_update_last_finalized:A, then desc_read:A reads from -+ * _prb_commit:B. -+ * -+ * Relies on: -+ * -+ * RELEASE from _prb_commit:B to desc_update_last_finalized:A -+ * matching -+ * ACQUIRE from desc_last_finalized_seq:A to desc_read:A -+ * -+ * Note: _prb_commit:B and desc_update_last_finalized:A can be -+ * different CPUs. However, the desc_update_last_finalized:A -+ * CPU (which performs the release) must have previously seen -+ * _prb_commit:B. -+ */ -+ if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, -+ &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ -+ old_seq = __ulseq_to_u64seq(rb, oldval); -+ goto try_again; -+ } -+} +- switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | ++ switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | + TRACE_FLAG_PREEMPT_RESCHED)) { ++ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED: ++ need_resched = 'B'; ++ break; + case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'N'; + break; ++ case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED: ++ need_resched = 'L'; ++ break; ++ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY: ++ need_resched = 'b'; ++ break; + case TRACE_FLAG_NEED_RESCHED: + need_resched = 'n'; + break; ++ case TRACE_FLAG_NEED_RESCHED_LAZY: ++ need_resched = 'l'; ++ break; + case TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'p'; + break; +-- +2.51.0 + +From 4f5ad10a5d389688460077a245a339d849f125ff Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Wed, 8 Jul 2015 17:14:48 +0200 +Subject: [PATCH 168/213] arm: Disable jump-label on PREEMPT_RT. + +jump-labels are used to efficiently switch between two possible code +paths. To achieve this, stop_machine() is used to keep the CPU in a +known state while the opcode is modified. The usage of stop_machine() +here leads to large latency spikes which can be observed on PREEMPT_RT. + +Jump labels may change the target during runtime and are not restricted +to debug or "configuration/ setup" part of a PREEMPT_RT system where +high latencies could be defined as acceptable. + +Disable jump-label support on a PREEMPT_RT system. + +[bigeasy: Patch description.] + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lkml.kernel.org/r/20220613182447.112191-2-bigeasy@linutronix.de +--- + arch/arm/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index 57c0448d017a..dea47ba6576b 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -73,7 +73,7 @@ config ARM + select HAS_IOPORT + select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT + select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 +- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU ++ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT + select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL +-- +2.51.0 + +From 2ae5592012f9b65497a2ff7b8128e0135231f3ff Mon Sep 17 00:00:00 2001 +From: "Yadi.hu" +Date: Wed, 10 Dec 2014 10:32:09 +0800 +Subject: [PATCH 169/213] ARM: enable irq in translation/section permission + fault handlers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Probably happens on all ARM, with +CONFIG_PREEMPT_RT +CONFIG_DEBUG_ATOMIC_SLEEP + +This simple program.... + +int main() { + *((char*)0xc0001000) = 0; +}; + +[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658 +[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a +[ 512.743217] INFO: lockdep is turned off. +[ 512.743360] irq event stamp: 0 +[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null) +[ 512.743714] hardirqs last disabled at (0): [] copy_process+0x3b0/0x11c0 +[ 512.744013] softirqs last enabled at (0): [] copy_process+0x3b0/0x11c0 +[ 512.744303] softirqs last disabled at (0): [< (null)>] (null) +[ 512.744631] [] (unwind_backtrace+0x0/0x104) +[ 512.745001] [] (dump_stack+0x20/0x24) +[ 512.745355] [] (__might_sleep+0x1dc/0x1e0) +[ 512.745717] [] (rt_spin_lock+0x34/0x6c) +[ 512.746073] [] (do_force_sig_info+0x34/0xf0) +[ 512.746457] [] (force_sig_info+0x18/0x1c) +[ 512.746829] [] (__do_user_fault+0x9c/0xd8) +[ 512.747185] [] (do_bad_area+0x7c/0x94) +[ 512.747536] [] (do_sect_fault+0x40/0x48) +[ 512.747898] [] (do_DataAbort+0x40/0xa0) +[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8) + +Oxc0000000 belongs to kernel address space, user task can not be +allowed to access it. For above condition, correct result is that +test case should receive a “segment fault” and exits but not stacks. + +the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in +prefetch/data abort handlers"),it deletes irq enable block in Data +abort assemble code and move them into page/breakpiont/alignment fault +handlers instead. But author does not enable irq in translation/section +permission fault handlers. ARM disables irq when it enters exception/ +interrupt mode, if kernel doesn't enable irq, it would be still disabled +during translation/section permission fault. + +We see the above splat because do_force_sig_info is still called with +IRQs off, and that code eventually does a: + + spin_lock_irqsave(&t->sighand->siglock, flags); + +As this is architecture independent code, and we've not seen any other +need for other arch to have the siglock converted to raw lock, we can +conclude that we should enable irq for ARM translation/section +permission exception. + + +Signed-off-by: Yadi.hu +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/arm/mm/fault.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c +index ed1a25f457e4..90c9ecd71271 100644 +--- a/arch/arm/mm/fault.c ++++ b/arch/arm/mm/fault.c +@@ -411,6 +411,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, + if (addr < TASK_SIZE) + return do_page_fault(addr, fsr, regs); + ++ if (interrupts_enabled(regs)) ++ local_irq_enable(); + - /* - * Attempt to finalize a specified descriptor. If this fails, the descriptor - * is either already final or it will finalize itself when the writer commits. - */ --static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) -+static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) - { -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; - unsigned long prev_state_val = DESC_SV(id, desc_committed); - struct prb_desc *d = to_desc(desc_ring, id); + if (user_mode(regs)) + goto bad_area; -- atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, -- DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ -- -- /* Best effort to remember the last finalized @id. */ -- atomic_long_set(&desc_ring->last_finalized_id, id); -+ if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, -+ DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ -+ desc_update_last_finalized(rb); -+ } +@@ -481,6 +484,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, + static int + do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + { ++ if (interrupts_enabled(regs)) ++ local_irq_enable(); ++ + do_bad_area(addr, fsr, regs); + return 0; } +-- +2.51.0 + +From 933a86035680953e7caf11aef13e6512602be2f8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 28 Jul 2011 13:32:57 +0200 +Subject: [PATCH 170/213] tty/serial/omap: Make the locking RT aware + +The lock is a sleeping lock and local_irq_save() is not the +optimsation we are looking for. Redo it to make it work on -RT and +non-RT. + +Signed-off-by: Thomas Gleixner +--- + drivers/tty/serial/omap-serial.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c +index f4c6ff806465..1097fca22307 100644 +--- a/drivers/tty/serial/omap-serial.c ++++ b/drivers/tty/serial/omap-serial.c +@@ -1212,13 +1212,10 @@ serial_omap_console_write(struct console *co, const char *s, + unsigned int ier; + int locked = 1; - /** -@@ -1550,7 +1664,7 @@ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, - * readers. (For seq==0 there is no previous descriptor.) - */ - if (info->seq > 0) -- desc_make_final(desc_ring, DESC_ID(id - 1)); -+ desc_make_final(rb, DESC_ID(id - 1)); +- local_irq_save(flags); +- if (up->port.sysrq) +- locked = 0; +- else if (oops_in_progress) +- locked = uart_port_trylock(&up->port); ++ if (up->port.sysrq || oops_in_progress) ++ locked = uart_port_trylock_irqsave(&up->port, &flags); + else +- uart_port_lock(&up->port); ++ uart_port_lock_irqsave(&up->port, &flags); - r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); - /* If text data allocation fails, a data-less record is committed. */ -@@ -1643,7 +1757,7 @@ void prb_commit(struct prb_reserved_entry *e) - */ - head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ - if (head_id != e->id) -- desc_make_final(desc_ring, e->id); -+ desc_make_final(e->rb, e->id); + /* + * First save the IER then disable the interrupts +@@ -1245,8 +1242,7 @@ serial_omap_console_write(struct console *co, const char *s, + check_modem_status(up); + + if (locked) +- uart_port_unlock(&up->port); +- local_irq_restore(flags); ++ uart_port_unlock_irqrestore(&up->port, flags); } - /** -@@ -1663,12 +1777,9 @@ void prb_commit(struct prb_reserved_entry *e) - */ - void prb_final_commit(struct prb_reserved_entry *e) - { -- struct prb_desc_ring *desc_ring = &e->rb->desc_ring; -- - _prb_commit(e, desc_finalized); + static int __init +-- +2.51.0 + +From 1ebfa41bc474ecee2388341342faeae7aa7de67c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 8 Jan 2013 21:36:51 +0100 +Subject: [PATCH 171/213] tty/serial/pl011: Make the locking work on RT + +The lock is a sleeping lock and local_irq_save() is not the optimsation +we are looking for. Redo it to make it work on -RT and non-RT. + +Signed-off-by: Thomas Gleixner +--- + drivers/tty/serial/amba-pl011.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c +index bb31ac9cae8c..8b7cf07e4491 100644 +--- a/drivers/tty/serial/amba-pl011.c ++++ b/drivers/tty/serial/amba-pl011.c +@@ -2335,13 +2335,10 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) -- /* Best effort to remember the last finalized @id. */ -- atomic_long_set(&desc_ring->last_finalized_id, e->id); -+ desc_update_last_finalized(e->rb); - } + clk_enable(uap->clk); - /* -@@ -1746,6 +1857,8 @@ static bool copy_data(struct prb_data_ring *data_ring, - * descriptor. However, it also verifies that the record is finalized and has - * the sequence number @seq. On success, 0 is returned. - * -+ * For the panic CPU, committed descriptors are also considered finalized. -+ * - * Error return values: - * -EINVAL: A finalized record with sequence number @seq does not exist. - * -ENOENT: A finalized record with sequence number @seq exists, but its data -@@ -1764,16 +1877,25 @@ static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, +- local_irq_save(flags); +- if (uap->port.sysrq) +- locked = 0; +- else if (oops_in_progress) +- locked = uart_port_trylock(&uap->port); ++ if (uap->port.sysrq || oops_in_progress) ++ locked = uart_port_trylock_irqsave(&uap->port, &flags); + else +- uart_port_lock(&uap->port); ++ uart_port_lock_irqsave(&uap->port, &flags); /* - * An unexpected @id (desc_miss) or @seq mismatch means the record -- * does not exist. A descriptor in the reserved or committed state -- * means the record does not yet exist for the reader. -+ * does not exist. A descriptor in the reserved state means the -+ * record does not yet exist for the reader. - */ - if (d_state == desc_miss || - d_state == desc_reserved || -- d_state == desc_committed || - s != seq) { - return -EINVAL; - } + * First save the CR then disable the interrupts +@@ -2367,8 +2364,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) + pl011_write(old_cr, uap, REG_CR); -+ /* -+ * A descriptor in the committed state means the record does not yet -+ * exist for the reader. However, for the panic CPU, committed -+ * records are also handled as finalized records since they contain -+ * message data in a consistent state and may contain additional -+ * hints as to the cause of the panic. -+ */ -+ if (d_state == desc_committed && !this_cpu_in_panic()) -+ return -EINVAL; -+ - /* - * A descriptor in the reusable state may no longer have its data - * available; report it as existing but with lost data. Or the record -@@ -1832,7 +1954,7 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq, - } + if (locked) +- uart_port_unlock(&uap->port); +- local_irq_restore(flags); ++ uart_port_unlock_irqrestore(&uap->port, flags); - /* Get the sequence number of the tail descriptor. */ --static u64 prb_first_seq(struct printk_ringbuffer *rb) -+u64 prb_first_seq(struct printk_ringbuffer *rb) - { - struct prb_desc_ring *desc_ring = &rb->desc_ring; - enum desc_state d_state; -@@ -1875,12 +1997,131 @@ static u64 prb_first_seq(struct printk_ringbuffer *rb) - return seq; + clk_disable(uap->clk); } +-- +2.51.0 + +From 920ff39d87d49364f8d6de08cc50a9307ae737df Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 19 May 2023 16:57:29 +0200 +Subject: [PATCH 172/213] ARM: vfp: Provide vfp_lock() for VFP locking. + +kernel_neon_begin() uses local_bh_disable() to ensure exclusive access +to the VFP unit. This is broken on PREEMPT_RT because a BH disabled +section remains preemptible on PREEMPT_RT. + +Introduce vfp_lock() which uses local_bh_disable() and preempt_disable() +on PREEMPT_RT. Since softirqs are processed always in thread context, +disabling preemption is enough to ensure that the current context won't +get interrupted by something that is using the VFP. Use it in +kernel_neon_begin(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/vfp/vfpmodule.c | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c +index 7e8773a2d99d..8d321cdb7ac5 100644 +--- a/arch/arm/vfp/vfpmodule.c ++++ b/arch/arm/vfp/vfpmodule.c +@@ -55,6 +55,34 @@ extern unsigned int VFP_arch_feroceon __alias(VFP_arch); + */ + union vfp_state *vfp_current_hw_state[NR_CPUS]; -+/** -+ * prb_next_reserve_seq() - Get the sequence number after the most recently -+ * reserved record. -+ * -+ * @rb: The ringbuffer to get the sequence number from. -+ * -+ * This is the public function available to readers to see what sequence -+ * number will be assigned to the next reserved record. ++/* ++ * Claim ownership of the VFP unit. + * -+ * Note that depending on the situation, this value can be equal to or -+ * higher than the sequence number returned by prb_next_seq(). ++ * The caller may change VFP registers until vfp_unlock() is called. + * -+ * Context: Any context. -+ * Return: The sequence number that will be assigned to the next record -+ * reserved. ++ * local_bh_disable() is used to disable preemption and to disable VFP ++ * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is ++ * not sufficient because it only serializes soft interrupt related sections ++ * via a local lock, but stays preemptible. Disabling preemption is the right ++ * choice here as bottom half processing is always in thread context on RT ++ * kernels so it implicitly prevents bottom half processing as well. + */ -+u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) ++static void vfp_lock(void) +{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ unsigned long last_finalized_id; -+ atomic_long_t *state_var; -+ u64 last_finalized_seq; -+ unsigned long head_id; -+ struct prb_desc desc; -+ unsigned long diff; -+ struct prb_desc *d; -+ int err; -+ -+ /* -+ * It may not be possible to read a sequence number for @head_id. -+ * So the ID of @last_finailzed_seq is used to calculate what the -+ * sequence number of @head_id will be. -+ */ -+ -+try_again: -+ last_finalized_seq = desc_last_finalized_seq(rb); -+ -+ /* -+ * @head_id is loaded after @last_finalized_seq to ensure that it is -+ * at or beyond @last_finalized_seq. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_last_finalized_seq:A reads from -+ * desc_update_last_finalized:A, then -+ * prb_next_reserve_seq:A reads from desc_reserve:D. -+ * -+ * Relies on: -+ * -+ * RELEASE from desc_reserve:D to desc_update_last_finalized:A -+ * matching -+ * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A -+ * -+ * Note: desc_reserve:D and desc_update_last_finalized:A can be -+ * different CPUs. However, the desc_update_last_finalized:A CPU -+ * (which performs the release) must have previously seen -+ * desc_read:C, which implies desc_reserve:D can be seen. -+ */ -+ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ -+ -+ d = to_desc(desc_ring, last_finalized_seq); -+ state_var = &d->state_var; -+ -+ /* Extract the ID, used to specify the descriptor to read. */ -+ last_finalized_id = DESC_ID(atomic_long_read(state_var)); -+ -+ /* Ensure @last_finalized_id is correct. */ -+ err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); -+ -+ if (err == -EINVAL) { -+ if (last_finalized_seq == 0) { -+ /* -+ * @last_finalized_seq still contains its initial -+ * value. Probably no record has been finalized yet. -+ * This means the ringbuffer is not yet full and the -+ * @head_id value can be used directly (subtracting -+ * off the id value corresponding to seq=0). -+ */ -+ -+ /* -+ * Because of hack#2 of the bootstrapping phase, the -+ * @head_id initial value must be handled separately. -+ */ -+ if (head_id == DESC0_ID(desc_ring->count_bits)) -+ return 0; -+ -+ /* -+ * The @head_id is initialized such that the first -+ * increment will yield the first record (seq=0). -+ * Therefore use the initial value +1 as the base to -+ * subtract from @head_id. -+ */ -+ last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; -+ } else { -+ /* Record must have been overwritten. Try again. */ -+ goto try_again; -+ } -+ } -+ -+ /* -+ * @diff is the number of records beyond the last record available -+ * to readers. -+ */ -+ diff = head_id - last_finalized_id; -+ -+ /* -+ * @head_id points to the most recently reserved record, but this -+ * function returns the sequence number that will be assigned to the -+ * next (not yet reserved) record. Thus +1 is needed. -+ */ -+ return (last_finalized_seq + diff + 1); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_bh_disable(); ++ else ++ preempt_disable(); +} + - /* -- * Non-blocking read of a record. Updates @seq to the last finalized record -- * (which may have no data available). -+ * Non-blocking read of a record. - * -- * See the description of prb_read_valid() and prb_read_valid_info() -- * for details. -+ * On success @seq is updated to the record that was read and (if provided) -+ * @r and @line_count will contain the read/calculated data. -+ * -+ * On failure @seq is updated to a record that is not yet available to the -+ * reader, but it will be the next record available to the reader. -+ * -+ * Note: When the current CPU is in panic, this function will skip over any -+ * non-existent/non-finalized records in order to allow the panic CPU -+ * to print any and all records that have been finalized. ++static void vfp_unlock(void) ++{ ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_bh_enable(); ++ else ++ preempt_enable(); ++} ++ + /* + * Is 'thread's most up to date state stored in this CPUs hardware? + * Must be called from non-preemptible context. +@@ -819,7 +847,7 @@ void kernel_neon_begin(void) + unsigned int cpu; + u32 fpexc; + +- local_bh_disable(); ++ vfp_lock(); + + /* + * Kernel mode NEON is only allowed outside of hardirq context with +@@ -850,7 +878,7 @@ void kernel_neon_end(void) + { + /* Disable the NEON/VFP unit. */ + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); +- local_bh_enable(); ++ vfp_unlock(); + } + EXPORT_SYMBOL(kernel_neon_end); + +-- +2.51.0 + +From 7f4a4a116c0e49b7dcd450d35d4b5d9bf6a0eef5 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 19 May 2023 16:57:30 +0200 +Subject: [PATCH 173/213] ARM: vfp: Use vfp_lock() in vfp_sync_hwstate(). + +vfp_sync_hwstate() uses preempt_disable() followed by local_bh_disable() +to ensure that it won't get interrupted while checking the VFP state. +This harms PREEMPT_RT because softirq handling can get preempted and +local_bh_disable() synchronizes the related section with a sleeping lock +which does not work with disabled preemption. + +Use the vfp_lock() to synchronize the access. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/vfp/vfpmodule.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c +index 8d321cdb7ac5..3b9360bfc508 100644 +--- a/arch/arm/vfp/vfpmodule.c ++++ b/arch/arm/vfp/vfpmodule.c +@@ -540,11 +540,9 @@ static inline void vfp_pm_init(void) { } */ - static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, - struct printk_record *r, unsigned int *line_count) -@@ -1899,12 +2140,32 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, - *seq = tail_seq; + void vfp_sync_hwstate(struct thread_info *thread) + { +- unsigned int cpu = get_cpu(); +- +- local_bh_disable(); ++ vfp_lock(); - } else if (err == -ENOENT) { -- /* Record exists, but no data available. Skip. */ -+ /* Record exists, but the data was lost. Skip. */ - (*seq)++; +- if (vfp_state_in_hw(cpu, thread)) { ++ if (vfp_state_in_hw(raw_smp_processor_id(), thread)) { + u32 fpexc = fmrx(FPEXC); - } else { -- /* Non-existent/non-finalized record. Must stop. */ -- return false; -+ /* -+ * Non-existent/non-finalized record. Must stop. -+ * -+ * For panic situations it cannot be expected that -+ * non-finalized records will become finalized. But -+ * there may be other finalized records beyond that -+ * need to be printed for a panic situation. If this -+ * is the panic CPU, skip this -+ * non-existent/non-finalized record unless it is -+ * at or beyond the head, in which case it is not -+ * possible to continue. -+ * -+ * Note that new messages printed on panic CPU are -+ * finalized when we are here. The only exception -+ * might be the last message without trailing newline. -+ * But it would have the sequence number returned -+ * by "prb_next_reserve_seq() - 1". -+ */ -+ if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) -+ (*seq)++; -+ else -+ return false; - } + /* +@@ -555,8 +553,7 @@ void vfp_sync_hwstate(struct thread_info *thread) + fmxr(FPEXC, fpexc); } -@@ -1932,7 +2193,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, - * On success, the reader must check r->info.seq to see which record was - * actually read. This allows the reader to detect dropped records. - * -- * Failure means @seq refers to a not yet written record. -+ * Failure means @seq refers to a record not yet available to the reader. - */ - bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, - struct printk_record *r) -@@ -1962,7 +2223,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, - * On success, the reader must check info->seq to see which record meta data - * was actually read. This allows the reader to detect dropped records. - * -- * Failure means @seq refers to a not yet written record. -+ * Failure means @seq refers to a record not yet available to the reader. - */ - bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, - struct printk_info *info, unsigned int *line_count) -@@ -2008,7 +2269,9 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) - * newest sequence number available to readers will be. - * - * This provides readers a sequence number to jump to if all currently -- * available records should be skipped. -+ * available records should be skipped. It is guaranteed that all records -+ * previous to the returned value have been finalized and are (or were) -+ * available to the reader. - * - * Context: Any context. - * Return: The sequence number of the next newest (not yet available) record -@@ -2016,34 +2279,19 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) +- local_bh_enable(); +- put_cpu(); ++ vfp_unlock(); + } + + /* Ensure that the thread reloads the hardware VFP state on the next use. */ +-- +2.51.0 + +From 559aec2d62d2f0fb47cdcf307029dae765f46b7b Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 28 Jun 2023 09:36:10 +0200 +Subject: [PATCH 174/213] ARM: vfp: Use vfp_lock() in vfp_support_entry(). + +vfp_entry() is invoked from exception handler and is fully preemptible. +It uses local_bh_disable() to remain uninterrupted while checking the +VFP state. +This is not working on PREEMPT_RT because local_bh_disable() +synchronizes the relevant section but the context remains fully +preemptible. + +Use vfp_lock() for uninterrupted access. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/vfp/vfpmodule.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c +index 3b9360bfc508..9543f011d0ed 100644 +--- a/arch/arm/vfp/vfpmodule.c ++++ b/arch/arm/vfp/vfpmodule.c +@@ -708,7 +708,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) + if (!user_mode(regs)) + return vfp_kmode_exception(regs, trigger); + +- local_bh_disable(); ++ vfp_lock(); + fpexc = fmrx(FPEXC); + + /* +@@ -787,7 +787,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) + if (!(fpscr & FPSCR_IXE)) { + if (!(fpscr & FPSCR_LENGTH_MASK)) { + pr_debug("not VFP\n"); +- local_bh_enable(); ++ vfp_unlock(); + return -ENOEXEC; + } + fpexc |= FPEXC_DEX; +@@ -797,7 +797,7 @@ bounce: regs->ARM_pc += 4; + VFP_bounce(trigger, fpexc, regs); + } + +- local_bh_enable(); ++ vfp_unlock(); + return 0; + } + +-- +2.51.0 + +From 310f0eb20614e662c6fe5c69edce1eb2308d9e8a Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 28 Jun 2023 09:39:33 +0200 +Subject: [PATCH 175/213] ARM: vfp: Move sending signals outside of + vfp_lock()ed section. + +VFP_bounce() is invoked from within vfp_support_entry() and may send a +signal. Sending a signal uses spinlock_t which becomes a sleeping lock +on PREEMPT_RT and must not be acquired within a preempt-disabled +section. + +Move the vfp_raise_sigfpe() block outside of the vfp_lock() section. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/vfp/vfpmodule.c | 29 ++++++++++++++++++----------- + 1 file changed, 18 insertions(+), 11 deletions(-) + +diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c +index 9543f011d0ed..9fde36fcb80c 100644 +--- a/arch/arm/vfp/vfpmodule.c ++++ b/arch/arm/vfp/vfpmodule.c +@@ -268,7 +268,7 @@ static void vfp_panic(char *reason, u32 inst) + /* + * Process bitmask of exception conditions. */ - u64 prb_next_seq(struct printk_ringbuffer *rb) +-static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs) ++static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr) { -- struct prb_desc_ring *desc_ring = &rb->desc_ring; -- enum desc_state d_state; -- unsigned long id; - u64 seq; + int si_code = 0; -- /* Check if the cached @id still points to a valid @seq. */ -- id = atomic_long_read(&desc_ring->last_finalized_id); -- d_state = desc_read(desc_ring, id, NULL, &seq, NULL); -+ seq = desc_last_finalized_seq(rb); +@@ -276,8 +276,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ -- if (d_state == desc_finalized || d_state == desc_reusable) { -- /* -- * Begin searching after the last finalized record. -- * -- * On 0, the search must begin at 0 because of hack#2 -- * of the bootstrapping phase it is not known if a -- * record at index 0 exists. -- */ -- if (seq != 0) -- seq++; -- } else { -- /* -- * The information about the last finalized sequence number -- * has gone. It should happen only when there is a flood of -- * new messages and the ringbuffer is rapidly recycled. -- * Give up and start from the beginning. -- */ -- seq = 0; -- } -+ /* -+ * Begin searching after the last finalized record. -+ * -+ * On 0, the search must begin at 0 because of hack#2 -+ * of the bootstrapping phase it is not known if a -+ * record at index 0 exists. -+ */ -+ if (seq != 0) -+ seq++; + if (exceptions == VFP_EXCEPTION_ERROR) { + vfp_panic("unhandled bounce", inst); +- vfp_raise_sigfpe(FPE_FLTINV, regs); +- return; ++ return FPE_FLTINV; + } + + /* +@@ -305,8 +304,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ + RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF); + RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV); + +- if (si_code) +- vfp_raise_sigfpe(si_code, regs); ++ return si_code; + } + + /* +@@ -352,6 +350,8 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) + static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) + { + u32 fpscr, orig_fpscr, fpsid, exceptions; ++ int si_code2 = 0; ++ int si_code = 0; + + pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); + +@@ -397,8 +397,8 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) + * unallocated VFP instruction but with FPSCR.IXE set and not + * on VFP subarch 1. + */ +- vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); +- return; ++ si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr); ++ goto exit; + } + + /* +@@ -422,14 +422,14 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) + */ + exceptions = vfp_emulate_instruction(trigger, fpscr, regs); + if (exceptions) +- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); ++ si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); + + /* + * If there isn't a second FP instruction, exit now. Note that + * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. + */ + if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) +- return; ++ goto exit; + + /* + * The barrier() here prevents fpinst2 being read +@@ -441,7 +441,13 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) + emulate: + exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); + if (exceptions) +- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); ++ si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); ++exit: ++ vfp_unlock(); ++ if (si_code2) ++ vfp_raise_sigfpe(si_code2, regs); ++ if (si_code) ++ vfp_raise_sigfpe(si_code, regs); + } + + static void vfp_enable(void *unused) +@@ -773,6 +779,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) + * replay the instruction that trapped. + */ + fmxr(FPEXC, fpexc); ++ vfp_unlock(); + } else { + /* Check for synchronous or asynchronous exceptions */ + if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { +@@ -794,10 +801,10 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) + } + } + bounce: regs->ARM_pc += 4; ++ /* VFP_bounce() will invoke vfp_unlock() */ + VFP_bounce(trigger, fpexc, regs); + } - /* - * The information about the last finalized @seq might be inaccurate. -@@ -2085,7 +2333,7 @@ void prb_init(struct printk_ringbuffer *rb, - rb->desc_ring.infos = infos; - atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); - atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); -- atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); -+ atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); +- vfp_unlock(); + return 0; + } - rb->text_data_ring.size_bits = textbits; - rb->text_data_ring.data = text_buf; -diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h -index 18cd25e48..52626d0f1 100644 ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -75,7 +75,7 @@ struct prb_desc_ring { - struct printk_info *infos; - atomic_long_t head_id; - atomic_long_t tail_id; -- atomic_long_t last_finalized_id; -+ atomic_long_t last_finalized_seq; - }; +-- +2.51.0 + +From 131efd6b6aa9e61e087e6bdf41a77c2566309a01 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:29 +0200 +Subject: [PATCH 176/213] ARM: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/arm/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index dea47ba6576b..a154ecd2d3c3 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -34,6 +34,7 @@ config ARM + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST +@@ -118,6 +119,7 @@ config ARM + select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ +-- +2.51.0 + +From a79dae2043883a10af3285e26fabd4325c2cf5a0 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:35 +0200 +Subject: [PATCH 177/213] ARM64: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/arm64/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 4ecba0690938..9dc1dd4b1b6c 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -97,6 +97,7 @@ config ARM64 + select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PER_VMA_LOCK ++ select ARCH_SUPPORTS_RT + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_BPF_JIT +-- +2.51.0 + +From 56f55ebf40ecd78f31dbaa81d163f9640ca89405 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 26 Jul 2019 11:30:49 +0200 +Subject: [PATCH 178/213] powerpc: traps: Use PREEMPT_RT + +Add PREEMPT_RT to the backtrace if enabled. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/powerpc/kernel/traps.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index 2de7f6dcd32b..739f5b179a7f 100644 +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -261,12 +261,17 @@ static char *get_mmu_str(void) - /* -@@ -127,8 +127,22 @@ enum desc_state { - #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) - #define DESC_ID_MASK (~DESC_FLAGS_MASK) - #define DESC_ID(sv) ((sv) & DESC_ID_MASK) + static int __die(const char *str, struct pt_regs *regs, long err) + { ++ const char *pr = ""; + -+/* -+ * Special data block logical position values (for fields of -+ * @prb_desc.text_blk_lpos). -+ * -+ * - Bit0 is used to identify if the record has no data block. (Implemented in -+ * the LPOS_DATALESS() macro.) -+ * -+ * - Bit1 specifies the reason for not having a data block. -+ * -+ * These special values could never be real lpos values because of the -+ * meta data and alignment padding of data blocks. (See to_blk_size() for -+ * details.) -+ */ - #define FAILED_LPOS 0x1 --#define NO_LPOS 0x3 -+#define EMPTY_LINE_LPOS 0x3 - - #define FAILED_BLK_LPOS \ - { \ -@@ -259,7 +273,7 @@ static struct printk_ringbuffer name = { \ - .infos = &_##name##_infos[0], \ - .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ - .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ -- .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \ -+ .last_finalized_seq = ATOMIC_INIT(0), \ - }, \ - .text_data_ring = { \ - .size_bits = (avgtextbits) + (descbits), \ -@@ -378,7 +392,41 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, - bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, - struct printk_info *info, unsigned int *line_count); + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -+u64 prb_first_seq(struct printk_ringbuffer *rb); - u64 prb_first_valid_seq(struct printk_ringbuffer *rb); - u64 prb_next_seq(struct printk_ringbuffer *rb); -+u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); -+ -+#ifdef CONFIG_64BIT -+ -+#define __u64seq_to_ulseq(u64seq) (u64seq) -+#define __ulseq_to_u64seq(rb, ulseq) (ulseq) -+ -+#else /* CONFIG_64BIT */ -+ -+#define __u64seq_to_ulseq(u64seq) ((u32)u64seq) -+ -+static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) -+{ -+ u64 rb_first_seq = prb_first_seq(rb); -+ u64 seq; -+ -+ /* -+ * The provided sequence is only the lower 32 bits of the ringbuffer -+ * sequence. It needs to be expanded to 64bit. Get the first sequence -+ * number from the ringbuffer and fold it. -+ * -+ * Having a 32bit representation in the console is sufficient. -+ * If a console ever gets more than 2^31 records behind -+ * the ringbuffer then this is the least of the problems. -+ * -+ * Also the access to the ring buffer is always safe. -+ */ -+ seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq); -+ -+ return seq; -+} ++ if (IS_ENABLED(CONFIG_PREEMPTION)) ++ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; + -+#endif /* CONFIG_64BIT */ - - #endif /* _KERNEL_PRINTK_RINGBUFFER_H */ -diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c -index 6d10927a0..8d9408d65 100644 ---- a/kernel/printk/printk_safe.c -+++ b/kernel/printk/printk_safe.c -@@ -26,6 +26,18 @@ void __printk_safe_exit(void) - this_cpu_dec(printk_context); + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", + PAGE_SIZE / 1024, get_mmu_str(), +- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", ++ pr, + IS_ENABLED(CONFIG_SMP) ? " SMP" : "", + IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", + debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", +-- +2.51.0 + +From a165bd69aaf29884ed53637055c9b64a5c9e7068 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Tue, 26 Mar 2019 18:31:54 +0100 +Subject: [PATCH 179/213] powerpc/pseries/iommu: Use a locallock instead + local_irq_save() + +The locallock protects the per-CPU variable tce_page. The function +attempts to allocate memory while tce_page is protected (by disabling +interrupts). + +Use local_irq_save() instead of local_irq_disable(). + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/powerpc/platforms/pseries/iommu.c | 31 +++++++++++++++++--------- + 1 file changed, 20 insertions(+), 11 deletions(-) + +diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c +index bf02f94a973d..eb4bd4b6edc4 100644 +--- a/arch/powerpc/platforms/pseries/iommu.c ++++ b/arch/powerpc/platforms/pseries/iommu.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -206,7 +207,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, + return ret; } -+void __printk_deferred_enter(void) -+{ -+ cant_migrate(); -+ this_cpu_inc(printk_context); -+} -+ -+void __printk_deferred_exit(void) -+{ -+ cant_migrate(); -+ this_cpu_dec(printk_context); -+} -+ - asmlinkage int vprintk(const char *fmt, va_list args) - { - #ifdef CONFIG_KGDB_KDB -diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c -index 46612fb15..f893f4cfd 100644 ---- a/kernel/rcu/rcutorture.c -+++ b/kernel/rcu/rcutorture.c -@@ -2409,6 +2409,12 @@ static int rcutorture_booster_init(unsigned int cpu) - WARN_ON_ONCE(!t); - sp.sched_priority = 2; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -+#ifdef CONFIG_PREEMPT_RT -+ t = per_cpu(timersd, cpu); -+ WARN_ON_ONCE(!t); -+ sp.sched_priority = 2; -+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -+#endif +-static DEFINE_PER_CPU(__be64 *, tce_page); ++struct tce_page { ++ __be64 * page; ++ local_lock_t lock; ++}; ++static DEFINE_PER_CPU(struct tce_page, tce_page) = { ++ .lock = INIT_LOCAL_LOCK(lock), ++}; + + static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, +@@ -229,9 +236,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + direction, attrs); } - /* Don't allow time recalculation while creating a new task. */ -diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h -index 11a1fac3a..6f085a159 100644 ---- a/kernel/rcu/tree_stall.h -+++ b/kernel/rcu/tree_stall.h -@@ -8,6 +8,7 @@ - */ +- local_irq_save(flags); /* to protect tcep and the page behind it */ ++ /* to protect tcep and the page behind it */ ++ local_lock_irqsave(&tce_page.lock, flags); - #include -+#include +- tcep = __this_cpu_read(tce_page); ++ tcep = __this_cpu_read(tce_page.page); - ////////////////////////////////////////////////////////////////////////////// - // -@@ -604,6 +605,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) - if (rcu_stall_is_suppressed()) - return; + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() +@@ -240,12 +248,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tceshift, + npages, uaddr, direction, attrs); + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } -+ nbcon_cpu_emergency_enter(); -+ - /* - * OK, time to rat on our buddy... - * See Documentation/RCU/stallwarn.rst for info on how to debug -@@ -658,6 +661,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) - panic_on_rcu_stall(); + rpn = __pa(uaddr) >> tceshift; +@@ -275,7 +283,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + tcenum += limit; + } while (npages > 0 && !rc); - rcu_force_quiescent_state(); /* Kick them all. */ -+ -+ nbcon_cpu_emergency_exit(); - } +- local_irq_restore(flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); + + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; +@@ -459,16 +467,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, + DMA_BIDIRECTIONAL, 0); + } + +- local_irq_disable(); /* to protect tcep and the page behind it */ +- tcep = __this_cpu_read(tce_page); ++ /* to protect tcep and the page behind it */ ++ local_lock_irq(&tce_page.lock); ++ tcep = __this_cpu_read(tce_page.page); - static void print_cpu_stall(unsigned long gps) -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 8c5f75af0..7deb3d1fe 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -899,14 +899,15 @@ static inline void hrtick_rq_init(struct rq *rq) + if (!tcep) { + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + if (!tcep) { +- local_irq_enable(); ++ local_unlock_irq(&tce_page.lock); + return -ENOMEM; + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } - #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) - /* -- * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, -+ * Atomically set TIF_NEED_RESCHED[_LAZY] and test for TIF_POLLING_NRFLAG, - * this avoids any races wrt polling state changes and thereby avoids - * spurious IPIs. - */ --static inline bool set_nr_and_not_polling(struct task_struct *p) -+static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit) - { - struct thread_info *ti = task_thread_info(p); -- return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); -+ -+ return !(fetch_or(&ti->flags, 1 << tif_bit) & _TIF_POLLING_NRFLAG); - } + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; +@@ -511,7 +520,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, - /* -@@ -923,7 +924,7 @@ static bool set_nr_if_polling(struct task_struct *p) - for (;;) { - if (!(val & _TIF_POLLING_NRFLAG)) - return false; -- if (val & _TIF_NEED_RESCHED) -+ if (val & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - return true; - if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED)) - break; -@@ -932,9 +933,9 @@ static bool set_nr_if_polling(struct task_struct *p) - } + /* error cleanup: caller will clear whole range */ - #else --static inline bool set_nr_and_not_polling(struct task_struct *p) -+static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit) - { -- set_tsk_need_resched(p); -+ set_tsk_thread_flag(p, tif_bit); - return true; +- local_irq_enable(); ++ local_unlock_irq(&tce_page.lock); + return rc; } -@@ -1040,28 +1041,47 @@ void wake_up_q(struct wake_q_head *head) - * might also involve a cross-CPU call to trigger the scheduler on - * the target CPU. +-- +2.51.0 + +From 5d747147d332237d973f0b75c337b2cd090656b1 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 9 Mar 2023 09:13:52 +0100 +Subject: [PATCH 180/213] powerpc/pseries: Select the generic memory allocator. + +The RTAS work area allocator is using the generic memory allocator and +as such it must select it. + +Select the generic memory allocator on pseries. + +Fixes: 43033bc62d349 ("powerpc/pseries: add RTAS work area allocator") +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/20230309135110.uAxhqRFk@linutronix.de +--- + arch/powerpc/platforms/pseries/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig +index 4ebf2ef2845d..381c3be3bec1 100644 +--- a/arch/powerpc/platforms/pseries/Kconfig ++++ b/arch/powerpc/platforms/pseries/Kconfig +@@ -2,6 +2,7 @@ + config PPC_PSERIES + depends on PPC64 && PPC_BOOK3S + bool "IBM pSeries & new (POWER5-based) iSeries" ++ select GENERIC_ALLOCATOR + select HAVE_PCSPKR_PLATFORM + select MPIC + select OF_DYNAMIC +-- +2.51.0 + +From 24d1a081d546e97028490fec3b69fb3481651920 Mon Sep 17 00:00:00 2001 +From: Bogdan Purcareata +Date: Fri, 24 Apr 2015 15:53:13 +0000 +Subject: [PATCH 181/213] powerpc/kvm: Disable in-kernel MPIC emulation for + PREEMPT_RT + +While converting the openpic emulation code to use a raw_spinlock_t enables +guests to run on RT, there's still a performance issue. For interrupts sent in +directed delivery mode with a multiple CPU mask, the emulated openpic will loop +through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop +through all the pending interrupts for that VCPU. This is done while holding the +raw_lock, meaning that in all this time the interrupts and preemption are +disabled on the host Linux. A malicious user app can max both these number and +cause a DoS. + +This temporary fix is sent for two reasons. First is so that users who want to +use the in-kernel MPIC emulation are aware of the potential latencies, thus +making sure that the hardware MPIC and their usage scenario does not involve +interrupts sent in directed delivery mode, and the number of possible pending +interrupts is kept small. Secondly, this should incentivize the development of a +proper openpic emulation that would be better suited for RT. + +Acked-by: Scott Wood +Signed-off-by: Bogdan Purcareata +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/powerpc/kvm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig +index 902611954200..2f188137f830 100644 +--- a/arch/powerpc/kvm/Kconfig ++++ b/arch/powerpc/kvm/Kconfig +@@ -224,6 +224,7 @@ config KVM_E500MC + config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM && PPC_E500 ++ depends on !PREEMPT_RT + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD + select HAVE_KVM_IRQ_ROUTING +-- +2.51.0 + +From e56c5fd24d51a553206660bbf56be2ad68c51ab3 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Tue, 26 Mar 2019 18:31:29 +0100 +Subject: [PATCH 182/213] powerpc/stackprotector: work around stack-guard init + from atomic + +This is invoked from the secondary CPU in atomic context. On x86 we use +tsc instead. On Power we XOR it against mftb() so lets use stack address +as the initial value. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/powerpc/include/asm/stackprotector.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h +index 283c34647856..4727f40052dd 100644 +--- a/arch/powerpc/include/asm/stackprotector.h ++++ b/arch/powerpc/include/asm/stackprotector.h +@@ -19,8 +19,13 @@ */ --void resched_curr(struct rq *rq) -+static void __resched_curr(struct rq *rq, int lazy) + static __always_inline void boot_init_stack_canary(void) { -+ int cpu, tif_bit = TIF_NEED_RESCHED + lazy; - struct task_struct *curr = rq->curr; -- int cpu; - - lockdep_assert_rq_held(rq); +- unsigned long canary = get_random_canary(); ++ unsigned long canary; -- if (test_tsk_need_resched(curr)) -+ if (unlikely(test_tsk_thread_flag(curr, tif_bit))) - return; ++#ifndef CONFIG_PREEMPT_RT ++ canary = get_random_canary(); ++#else ++ canary = ((unsigned long)&canary) & CANARY_MASK; ++#endif + current->stack_canary = canary; + #ifdef CONFIG_PPC64 + get_paca()->canary = canary; +-- +2.51.0 + +From b12df484286b2f1edb3579a8abddf80d3dfebac4 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:41 +0200 +Subject: [PATCH 183/213] POWERPC: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +--- + arch/powerpc/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 6baa8b85601a..cca3f7ca6f10 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -166,6 +166,7 @@ config PPC + select ARCH_STACKWALK + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_MEMTEST +@@ -268,6 +269,7 @@ config PPC + select HAVE_PERF_USER_STACK_DUMP + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_RSEQ + select HAVE_SETUP_PER_CPU_AREA if PPC64 + select HAVE_SOFTIRQ_ON_OWN_STACK +-- +2.51.0 + +From 7a4f6ef1e3301998bb30741a77051424c9725bff Mon Sep 17 00:00:00 2001 +From: Evan Green +Date: Mon, 6 Nov 2023 14:58:55 -0800 +Subject: [PATCH 184/213] RISC-V: Probe misaligned access speed in parallel + +Probing for misaligned access speed takes about 0.06 seconds. On a +system with 64 cores, doing this in smp_callin() means it's done +serially, extending boot time by 3.8 seconds. That's a lot of boot time. + +Instead of measuring each CPU serially, let's do the measurements on +all CPUs in parallel. If we disable preemption on all CPUs, the +jiffies stop ticking, so we can do this in stages of 1) everybody +except core 0, then 2) core 0. The allocations are all done outside of +on_each_cpu() to avoid calling alloc_pages() with interrupts disabled. + +For hotplugged CPUs that come in after the boot time measurement, +register CPU hotplug callbacks, and do the measurement there. Interrupts +are enabled in those callbacks, so they're fine to do alloc_pages() in. + +[bigeasy: merge the individual patches into the final step.] + +Reported-by: Jisheng Zhang +Closes: https://lore.kernel.org/all/mhng-9359993d-6872-4134-83ce-c97debe1cf9a@palmer-ri-x1c9/T/#mae9b8f40016f9df428829d33360144dc5026bcbf +Fixes: 584ea6564bca ("RISC-V: Probe for unaligned access speed") +Signed-off-by: Evan Green +Link: https://lore.kernel.org/r/20231106225855.3121724-1-evan@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sebastian Andrzej Siewior + +Conflicts: + arch/riscv/include/asm/cpufeature.h +--- + arch/riscv/kernel/cpufeature.c | 84 ++++++++++++++++++++++++++++++---- + arch/riscv/kernel/smpboot.c | 1 - + 2 files changed, 76 insertions(+), 9 deletions(-) + +diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c +index 3f0ad09e1650..eccb96c48931 100644 +--- a/arch/riscv/kernel/cpufeature.c ++++ b/arch/riscv/kernel/cpufeature.c +@@ -8,6 +8,7 @@ - cpu = cpu_of(rq); + #include + #include ++#include + #include + #include + #include +@@ -30,6 +31,7 @@ - if (cpu == smp_processor_id()) { -- set_tsk_need_resched(curr); -- set_preempt_need_resched(); -+ set_tsk_thread_flag(curr, tif_bit); -+ if (!lazy) -+ set_preempt_need_resched(); - return; - } + #define MISALIGNED_ACCESS_JIFFIES_LG2 1 + #define MISALIGNED_BUFFER_SIZE 0x4000 ++#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) + #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) -- if (set_nr_and_not_polling(curr)) -- smp_send_reschedule(cpu); -- else -+ if (set_nr_and_not_polling(curr, tif_bit)) { -+ if (!lazy) -+ smp_send_reschedule(cpu); -+ } else { - trace_sched_wake_idle_without_ipi(cpu); -+ } -+} -+ -+void resched_curr(struct rq *rq) -+{ -+ __resched_curr(rq, 0); -+} -+ -+void resched_curr_lazy(struct rq *rq) -+{ -+ int lazy = IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && !sched_feat(FORCE_NEED_RESCHED) ? -+ TIF_NEED_RESCHED_LAZY_OFFSET : 0; -+ -+ if (lazy && unlikely(test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED))) -+ return; -+ -+ __resched_curr(rq, lazy); + unsigned long elf_hwcap __read_mostly; +@@ -575,14 +577,15 @@ unsigned long riscv_get_elf_hwcap(void) + return hwcap; } - void resched_cpu(int cpu) -@@ -1134,7 +1154,7 @@ static void wake_up_idle_cpu(int cpu) - if (cpu == smp_processor_id()) - return; - -- if (set_nr_and_not_polling(rq->idle)) -+ if (set_nr_and_not_polling(rq->idle, TIF_NEED_RESCHED)) - smp_send_reschedule(cpu); - else - trace_sched_wake_idle_without_ipi(cpu); -@@ -6723,10 +6743,14 @@ void __noreturn do_task_dead(void) - - static inline void sched_submit_work(struct task_struct *tsk) +-void check_unaligned_access(int cpu) ++static int check_unaligned_access(void *param) { -+ static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG); - unsigned int task_flags; ++ int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; +- struct page *page; ++ struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; +@@ -649,7 +652,7 @@ void check_unaligned_access(int cpu) + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); -- if (task_is_running(tsk)) -- return; -+ /* -+ * Establish LD_WAIT_CONFIG context to ensure none of the code called -+ * will use a blocking primitive -- which would lead to recursion. -+ */ -+ lock_map_acquire_try(&sched_map); +- goto out; ++ return 0; + } - task_flags = tsk->flags; - /* -@@ -6752,6 +6776,8 @@ static inline void sched_submit_work(struct task_struct *tsk) - * make sure to submit it to avoid deadlocks. - */ - blk_flush_plug(tsk->plug, true); -+ -+ lock_map_release(&sched_map); - } + if (word_cycles < byte_cycles) +@@ -663,18 +666,83 @@ void check_unaligned_access(int cpu) + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); - static void sched_update_worker(struct task_struct *tsk) -@@ -6764,16 +6790,26 @@ static void sched_update_worker(struct task_struct *tsk) - } - } + per_cpu(misaligned_access_speed, cpu) = speed; ++ return 0; ++} --asmlinkage __visible void __sched schedule(void) -+static __always_inline void __schedule_loop(unsigned int sched_mode) - { -- struct task_struct *tsk = current; -- -- sched_submit_work(tsk); - do { - preempt_disable(); -- __schedule(SM_NONE); -+ __schedule(sched_mode); - sched_preempt_enable_no_resched(); - } while (need_resched()); +-out: +- __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); ++static void check_unaligned_access_nonboot_cpu(void *param) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct page **pages = param; ++ ++ if (smp_processor_id() != 0) ++ check_unaligned_access(pages[cpu]); +} + -+asmlinkage __visible void __sched schedule(void) ++static int riscv_online_cpu(unsigned int cpu) +{ -+ struct task_struct *tsk = current; ++ static struct page *buf; + -+#ifdef CONFIG_RT_MUTEXES -+ lockdep_assert(!tsk->sched_rt_mutex); -+#endif ++ /* We are already set since the last check */ ++ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) ++ return 0; + -+ if (!task_is_running(tsk)) -+ sched_submit_work(tsk); -+ __schedule_loop(SM_NONE); - sched_update_worker(tsk); - } - EXPORT_SYMBOL(schedule); -@@ -6837,11 +6873,7 @@ void __sched schedule_preempt_disabled(void) - #ifdef CONFIG_PREEMPT_RT - void __sched notrace schedule_rtlock(void) - { -- do { -- preempt_disable(); -- __schedule(SM_RTLOCK_WAIT); -- sched_preempt_enable_no_resched(); -- } while (need_resched()); -+ __schedule_loop(SM_RTLOCK_WAIT); ++ buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); ++ if (!buf) { ++ pr_warn("Allocation failure, not measuring misaligned performance\n"); ++ return -ENOMEM; ++ } ++ ++ check_unaligned_access(buf); ++ __free_pages(buf, MISALIGNED_BUFFER_ORDER); ++ return 0; } - NOKPROBE_SYMBOL(schedule_rtlock); - #endif -@@ -7037,6 +7069,32 @@ static void __setscheduler_prio(struct task_struct *p, int prio) - #ifdef CONFIG_RT_MUTEXES - -+/* -+ * Would be more useful with typeof()/auto_type but they don't mix with -+ * bit-fields. Since it's a local thing, use int. Keep the generic sounding -+ * name such that if someone were to implement this function we get to compare -+ * notes. -+ */ -+#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; }) +-static int check_unaligned_access_boot_cpu(void) ++/* Measure unaligned access on all CPUs present at boot in parallel. */ ++static int check_unaligned_access_all_cpus(void) + { +- check_unaligned_access(0); ++ unsigned int cpu; ++ unsigned int cpu_count = num_possible_cpus(); ++ struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), ++ GFP_KERNEL); + -+void rt_mutex_pre_schedule(void) -+{ -+ lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1)); -+ sched_submit_work(current); -+} ++ if (!bufs) { ++ pr_warn("Allocation failure, not measuring misaligned performance\n"); ++ return 0; ++ } + -+void rt_mutex_schedule(void) -+{ -+ lockdep_assert(current->sched_rt_mutex); -+ __schedule_loop(SM_NONE); -+} ++ /* ++ * Allocate separate buffers for each CPU so there's no fighting over ++ * cache lines. ++ */ ++ for_each_cpu(cpu, cpu_online_mask) { ++ bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); ++ if (!bufs[cpu]) { ++ pr_warn("Allocation failure, not measuring misaligned performance\n"); ++ goto out; ++ } ++ } + -+void rt_mutex_post_schedule(void) -+{ -+ sched_update_worker(current); -+ lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0)); -+} ++ /* Check everybody except 0, who stays behind to tend jiffies. */ ++ on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + - static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) ++ /* Check core 0. */ ++ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); ++ ++ /* Setup hotplug callback for any new CPUs that come online. */ ++ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", ++ riscv_online_cpu, NULL); ++ ++out: ++ for_each_cpu(cpu, cpu_online_mask) { ++ if (bufs[cpu]) ++ __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); ++ } ++ ++ kfree(bufs); + return 0; + } + +-arch_initcall(check_unaligned_access_boot_cpu); ++arch_initcall(check_unaligned_access_all_cpus); + + void riscv_user_isa_enable(void) { - if (pi_task) -@@ -8895,6 +8953,21 @@ static inline void preempt_dynamic_init(void) { } +diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c +index d1b0a6fc3adf..d162bf339beb 100644 +--- a/arch/riscv/kernel/smpboot.c ++++ b/arch/riscv/kernel/smpboot.c +@@ -248,7 +248,6 @@ asmlinkage __visible void smp_callin(void) - #endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ + numa_add_cpu(curr_cpuid); + set_cpu_online(curr_cpuid, 1); +- check_unaligned_access(curr_cpuid); -+/* -+ * task_is_pi_boosted - Check if task has been PI boosted. -+ * @p: Task to check. -+ * -+ * Return true if task is subject to priority inheritance. -+ */ -+bool task_is_pi_boosted(const struct task_struct *p) -+{ -+ int prio = p->prio; -+ -+ if (!rt_prio(prio)) -+ return false; -+ return prio != p->normal_prio; -+} -+ - /** - * yield - yield the current processor to other threads. - * -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index 115e266db..90028c5ba 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -333,6 +333,23 @@ static const struct file_operations sched_debug_fops = { - .release = seq_release, - }; + if (has_vector()) { + if (riscv_v_setup_vsize()) +-- +2.51.0 + +From d9413ad547f0cdd4897ac6428b36eaa9b8166da9 Mon Sep 17 00:00:00 2001 +From: Jisheng Zhang +Date: Tue, 31 Oct 2023 22:35:20 +0800 +Subject: [PATCH 185/213] riscv: add PREEMPT_AUTO support + +riscv has switched to GENERIC_ENTRY, so adding PREEMPT_AUTO is as simple +as adding TIF_ARCH_RESCHED_LAZY related definitions and enabling +HAVE_PREEMPT_AUTO. + +Signed-off-by: Jisheng Zhang +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/riscv/Kconfig | 1 + + arch/riscv/include/asm/thread_info.h | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig +index 1304992232ad..f05883adb5f7 100644 +--- a/arch/riscv/Kconfig ++++ b/arch/riscv/Kconfig +@@ -136,6 +136,7 @@ config RISCV + select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK + select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL ++ select HAVE_PREEMPT_AUTO + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RETHOOK if !XIP_KERNEL + select HAVE_RSEQ +diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h +index 8c72d1bcdf14..c59ad9b7af11 100644 +--- a/arch/riscv/include/asm/thread_info.h ++++ b/arch/riscv/include/asm/thread_info.h +@@ -86,6 +86,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); + * - pending work-to-be-done flags are in lowest half-word + * - other flags in upper half-word(s) + */ ++#define TIF_ARCH_RESCHED_LAZY 0 /* Lazy rescheduling */ + #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ + #define TIF_SIGPENDING 2 /* signal pending */ + #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +@@ -100,6 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) + #define _TIF_UPROBE (1 << TIF_UPROBE) ++#define _TIF_ARCH_RESCHED_LAZY (1 << TIF_ARCH_RESCHED_LAZY) -+static ssize_t sched_hog_write(struct file *filp, const char __user *ubuf, -+ size_t cnt, loff_t *ppos) + #define _TIF_WORK_MASK \ + (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ +-- +2.51.0 + +From 97d96d3b1c0df78b4d46b8d0997496d99c414836 Mon Sep 17 00:00:00 2001 +From: Jisheng Zhang +Date: Tue, 31 Oct 2023 22:35:21 +0800 +Subject: [PATCH 186/213] riscv: allow to enable RT + +Now, it's ready to enable RT on riscv. + +Signed-off-by: Jisheng Zhang +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/riscv/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig +index f05883adb5f7..391adf8ffc8d 100644 +--- a/arch/riscv/Kconfig ++++ b/arch/riscv/Kconfig +@@ -49,6 +49,7 @@ config RISCV + select ARCH_SUPPORTS_HUGETLBFS if MMU + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU ++ select ARCH_SUPPORTS_RT + select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USES_CFI_TRAPS if CFI_CLANG +-- +2.51.0 + +From 688d62d856c2ee1616d2767e8e9f9c8bfb32aa85 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Sat, 30 Jul 2011 21:55:53 -0500 +Subject: [PATCH 187/213] sysfs: Add /sys/kernel/realtime entry + +Add a /sys/kernel entry to indicate that the kernel is a +realtime kernel. + +Clark says that he needs this for udev rules, udev needs to evaluate +if its a PREEMPT_RT kernel a few thousand times and parsing uname +output is too slow or so. + +Are there better solutions? Should it exist and return 0 on !-rt? + +Signed-off-by: Clark Williams +Signed-off-by: Peter Zijlstra +Signed-off-by: Thomas Gleixner +--- + kernel/ksysfs.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c +index 1d4bc493b2f4..486c68c11bbe 100644 +--- a/kernel/ksysfs.c ++++ b/kernel/ksysfs.c +@@ -179,6 +179,15 @@ KERNEL_ATTR_RO(crash_elfcorehdr_size); + + #endif /* CONFIG_CRASH_CORE */ + ++#if defined(CONFIG_PREEMPT_RT) ++static ssize_t realtime_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) +{ -+ unsigned long end = jiffies + 60 * HZ; -+ -+ for (; time_before(jiffies, end) && !signal_pending(current);) -+ cpu_relax(); -+ -+ return cnt; ++ return sprintf(buf, "%d\n", 1); +} ++KERNEL_ATTR_RO(realtime); ++#endif + -+static const struct file_operations sched_hog_fops = { -+ .write = sched_hog_write, -+ .open = simple_open, -+ .llseek = default_llseek, -+}; -+ - static struct dentry *debugfs_sched; + /* whether file capabilities are enabled */ + static ssize_t fscaps_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +@@ -274,6 +283,9 @@ static struct attribute * kernel_attrs[] = { + #ifndef CONFIG_TINY_RCU + &rcu_expedited_attr.attr, + &rcu_normal_attr.attr, ++#endif ++#ifdef CONFIG_PREEMPT_RT ++ &realtime_attr.attr, + #endif + NULL + }; +-- +2.51.0 + +From dc39ecf7e8116b88aed2bc6cd0963fffdcd399d2 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Mon, 5 Feb 2024 14:04:38 -0600 +Subject: [PATCH 188/213] arch/riscv: check_unaligned_acces(): don't alloc page + for check + +Drop the alloc_pages() call since the page is passed in as +a parameter and the alloced page will not be freed. + +Reported-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams +--- + arch/riscv/kernel/cpufeature.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c +index eccb96c48931..c3cdcad4f324 100644 +--- a/arch/riscv/kernel/cpufeature.c ++++ b/arch/riscv/kernel/cpufeature.c +@@ -594,12 +594,6 @@ static int check_unaligned_access(void *param) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return; - static __init int sched_init_debug(void) -@@ -374,6 +391,8 @@ static __init int sched_init_debug(void) +- page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); +- if (!page) { +- pr_warn("Can't alloc pages to measure memcpy performance"); +- return; +- } +- + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ +-- +2.51.0 + +From 67a5562721d9ac20adce9a2f72f45badf3630adb Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Tue, 6 Feb 2024 12:17:19 -0600 +Subject: [PATCH 189/213] Revert "preempt: Put preempt_enable() within an + instrumentation*() section." + +This reverts commit cc3d27d9fdeddcb82db3ea176a44a5509e70eb1c. + +This code was fixed in 6.6 stable so no need for it in the RT series + +Reported-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams +--- + include/linux/preempt.h | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index cd16f0330fba..9aa6358a1a16 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -230,21 +230,15 @@ do { \ + #define preempt_enable() \ + do { \ + barrier(); \ +- if (unlikely(preempt_count_dec_and_test())) { \ +- instrumentation_begin(); \ ++ if (unlikely(preempt_count_dec_and_test())) \ + __preempt_schedule(); \ +- instrumentation_end(); \ +- } \ + } while (0) + + #define preempt_enable_notrace() \ + do { \ + barrier(); \ +- if (unlikely(__preempt_count_dec_and_test())) { \ +- instrumentation_begin(); \ ++ if (unlikely(__preempt_count_dec_and_test())) \ + __preempt_schedule_notrace(); \ +- instrumentation_end(); \ +- } \ + } while (0) + + #define preempt_check_resched() \ +-- +2.51.0 + +From 85961733bbeab34bc39a85810b8405e9294869f5 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 8 Jul 2011 20:25:16 +0200 +Subject: [PATCH 190/213] Add localversion for -RT release + +Signed-off-by: Thomas Gleixner +--- + localversion-rt | 1 + + 1 file changed, 1 insertion(+) + create mode 100644 localversion-rt + +diff --git a/localversion-rt b/localversion-rt +new file mode 100644 +index 000000000000..483ad771f201 +--- /dev/null ++++ b/localversion-rt +@@ -0,0 +1 @@ ++-rt19 +-- +2.51.0 + +From a2d09e65f22dd4312c4aaa86d5016f54b141d00e Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Tue, 27 Feb 2024 17:13:28 -0600 +Subject: [PATCH 191/213] Linux 6.6.18-rt23 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 483ad771f201..9a218ca23053 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt19 ++-rt23 +-- +2.51.0 + +From b2375bba8e5b34bb0f029b7598bf3e25d6ca3c5b Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Tue, 23 Jan 2024 12:56:21 +0100 +Subject: [PATCH 192/213] arm: Disable FAST_GUP on PREEMPT_RT if HIGHPTE is + also enabled. + +gup_pgd_range() is invoked with disabled interrupts and invokes +__kmap_local_page_prot() via pte_offset_map(), gup_p4d_range(). +With HIGHPTE enabled, __kmap_local_page_prot() invokes kmap_high_get() +which uses a spinlock_t via lock_kmap_any(). This leads to an +sleeping-while-atomic error on PREEMPT_RT because spinlock_t becomes a +sleeping lock and must not be acquired in atomic context. + +The loop in map_new_virtual() uses wait_queue_head_t for wake up which +also is using a spinlock_t. + +Limit HAVE_FAST_GUP additionaly to remain disabled on PREEMPT_RT with +HIGHPTE enabled. + +Signed-off-by: Sebastian Andrzej Siewior +(cherry picked from commit 02cf5a345530b4d3a94093f0b5c784701c2e7c6a) +Signed-off-by: Clark Williams +--- + arch/arm/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index a154ecd2d3c3..e5a283851c02 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -97,7 +97,7 @@ config ARM + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU + select HAVE_EXIT_THREAD +- select HAVE_FAST_GUP if ARM_LPAE ++ select HAVE_FAST_GUP if ARM_LPAE && !(PREEMPT_RT && HIGHPTE) + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_ERROR_INJECTION + select HAVE_FUNCTION_GRAPH_TRACER +-- +2.51.0 + +From 2d8e23222bed52fb2e12065de18ba8761d048ef5 Mon Sep 17 00:00:00 2001 +From: Junxiao Chang +Date: Tue, 23 Jan 2024 13:40:32 +0800 +Subject: [PATCH 193/213] printk: nbcon: move locked_port flag to struct + uart_port + +Console pointer in uart_port might be shared among multiple uart +ports. Flag port locked by nbcon should be saved in uart_port +structure instead of in console structure. + +Fixes: 6424f396c49e ("printk: nbcon: Implement processing in port->lock wrapper") +Suggested-by: John Ogness +Signed-off-by: Junxiao Chang +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/all/20240123054033.183114-2-junxiao.chang@intel.com +(cherry picked from commit d4fb86a96cb4a1efd24ca13a2ac234a1c9a3fdc5) +Signed-off-by: Clark Williams +--- + include/linux/console.h | 2 -- + include/linux/serial_core.h | 1 + + kernel/printk/nbcon.c | 8 ++++---- + 3 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 756e5008d828..b4d58071463e 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -309,7 +309,6 @@ struct nbcon_write_context { + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer +- * @locked_port: True, if the port lock is locked by nbcon + * @kthread: Printer kthread for this console + * @rcuwait: RCU-safe wait object for @kthread waking + * @irq_work: Defer @kthread waking to IRQ work context +@@ -343,7 +342,6 @@ struct console { + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; +- bool locked_port; + struct task_struct *kthread; + struct rcuwait rcuwait; + struct irq_work irq_work; +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index 7c861b4606aa..6de3d7aab17d 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -489,6 +489,7 @@ struct uart_port { + struct uart_icount icount; /* statistics */ - debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); + struct console *cons; /* struct console, if any */ ++ bool nbcon_locked_port; /* True, if the port is locked by nbcon */ + /* flags must be updated while holding port mutex */ + upf_t flags; -+ debugfs_create_file("hog", 0200, debugfs_sched, NULL, &sched_hog_fops); -+ - return 0; - } - late_initcall(sched_init_debug); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 2808dbdd0..bea994a42 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -1022,8 +1022,10 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); - * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i - * this is probably good enough. - */ --static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) -+static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se, bool tick) - { -+ struct rq *rq = rq_of(cfs_rq); -+ - if ((s64)(se->vruntime - se->deadline) < 0) +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index 1b1b585b1675..b53d93585ee7 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1586,7 +1586,7 @@ void nbcon_acquire(struct uart_port *up) + if (!uart_is_nbcon(up)) return; -@@ -1042,10 +1044,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) - /* - * The task has consumed its request, reschedule. - */ -- if (cfs_rq->nr_running > 1) { -- resched_curr(rq_of(cfs_rq)); -- clear_buddies(cfs_rq, se); -+ if (cfs_rq->nr_running < 2) -+ return; -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) || sched_feat(FORCE_NEED_RESCHED)) { -+ resched_curr(rq); -+ } else { -+ /* Did the task ignore the lazy reschedule request? */ -+ if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) -+ resched_curr(rq); -+ else -+ resched_curr_lazy(rq); - } -+ clear_buddies(cfs_rq, se); - } +- WARN_ON_ONCE(con->locked_port); ++ WARN_ON_ONCE(up->nbcon_locked_port); - #include "pelt.h" -@@ -1198,7 +1209,7 @@ s64 update_curr_common(struct rq *rq) - /* - * Update the current task's runtime statistics. - */ --static void update_curr(struct cfs_rq *cfs_rq) -+static void __update_curr(struct cfs_rq *cfs_rq, bool tick) - { - struct sched_entity *curr = cfs_rq->curr; - s64 delta_exec; -@@ -1211,7 +1222,7 @@ static void update_curr(struct cfs_rq *cfs_rq) - return; + do { + do { +@@ -1597,7 +1597,7 @@ void nbcon_acquire(struct uart_port *up) - curr->vruntime += calc_delta_fair(delta_exec, curr); -- update_deadline(cfs_rq, curr); -+ update_deadline(cfs_rq, curr, tick); - update_min_vruntime(cfs_rq); + } while (!nbcon_context_enter_unsafe(&ctxt)); - if (entity_is_task(curr)) -@@ -1220,6 +1231,11 @@ static void update_curr(struct cfs_rq *cfs_rq) - account_cfs_rq_runtime(cfs_rq, delta_exec); +- con->locked_port = true; ++ up->nbcon_locked_port = true; } + EXPORT_SYMBOL_GPL(nbcon_acquire); -+static inline void update_curr(struct cfs_rq *cfs_rq) -+{ -+ __update_curr(cfs_rq, false); -+} -+ - static void update_curr_fair(struct rq *rq) - { - update_curr(cfs_rq_of(&rq->curr->se)); -@@ -5515,7 +5531,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) - /* - * Update run-time statistics of the 'current'. - */ -- update_curr(cfs_rq); -+ __update_curr(cfs_rq, true); +@@ -1623,13 +1623,13 @@ void nbcon_release(struct uart_port *up) + .prio = NBCON_PRIO_NORMAL, + }; - /* - * Ensure that runnable average is periodically updated. -@@ -5529,7 +5545,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) - * validating it and just reschedule. - */ - if (queued) { -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); +- if (!con->locked_port) ++ if (!up->nbcon_locked_port) return; - } - /* -@@ -5675,7 +5691,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) - * hierarchy can be throttled - */ - if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); - } - static __always_inline -@@ -5935,7 +5951,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + if (nbcon_context_exit_unsafe(&ctxt)) + nbcon_context_release(&ctxt); - /* Determine whether we need to wake up potentially idle CPU: */ - if (rq->curr == rq->idle && rq->cfs.nr_running) -- resched_curr(rq); -+ resched_curr_lazy(rq); +- con->locked_port = false; ++ up->nbcon_locked_port = false; } - - #ifdef CONFIG_SMP -@@ -6640,7 +6656,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) - - if (delta < 0) { - if (task_current(rq, p)) -- resched_curr(rq); -+ resched_curr_lazy(rq); - return; + EXPORT_SYMBOL_GPL(nbcon_release); + +-- +2.51.0 + +From 2ef298a3d06dda62793ef5950cb9e3e8555e2b0d Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Tue, 25 Jun 2024 17:08:39 -0500 +Subject: [PATCH 194/213] Linux 6.6.35-rt34 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 9a218ca23053..21988f9ad53f 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt23 ++-rt34 +-- +2.51.0 + +From 8deb847ac97c777dc5aa530ed1a6b8a8a58f6f36 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Thu, 20 Jun 2024 11:21:13 +0200 +Subject: [PATCH 195/213] prinkt/nbcon: Add a scheduling point to + nbcon_kthread_func(). + +Constant printing can lead to a CPU hog in nbcon_kthread_func(). The +context is preemptible but on !PREEMPT kernels there is no explicit +preemption point which leads softlockup warnings. + +Add an explicit preemption point in nbcon_kthread_func(). + +Reported-by: Derek Barbosa +Link: https://lore.kernel.org/ZnHF5j1DUDjN1kkq@debarbos-thinkpadt14sgen2i.remote.csb +Reviewed-by: John Ogness +Acked-by: Andrew Halaney +Tested-by: Andrew Halaney +Acked-by: Derek Barbosa +Tested-by: Derek Barbosa +Link: https://lore.kernel.org/r/20240620094300.YJlW043f@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams +--- + kernel/printk/nbcon.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index b53d93585ee7..b941039ee7d2 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1081,6 +1081,7 @@ static int nbcon_kthread_func(void *__console) } - hrtick_start(rq, delta); -@@ -8318,7 +8334,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int - * prevents us from potentially nominating it as a false LAST_BUDDY - * below. - */ -- if (test_tsk_need_resched(curr)) -+ if (need_resched()) - return; - if (!sched_feat(WAKEUP_PREEMPTION)) -@@ -8356,7 +8372,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int - return; + console_srcu_read_unlock(cookie); ++ cond_resched(); - preempt: -- resched_curr(rq); -+ resched_curr_lazy(rq); - } + } while (backlog); + +-- +2.51.0 + +From 0afcf0044bdcd87cee9690cbef56b0ac4848e32c Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Sun, 28 Jul 2024 15:53:21 -0500 +Subject: [PATCH 196/213] Linux 6.6.43-rt38 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 21988f9ad53f..49bae8d6aa67 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt34 ++-rt38 +-- +2.51.0 + +From 7118e033e7f1d512160d916c9208fec158dda8f2 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 5 Aug 2024 09:48:12 -0500 +Subject: [PATCH 197/213] riscv: Add return value to check_unaligned_access(). + +The stable backport of commit c20d36cc2a207 ("riscv: don't probe +unaligned access speed if already done") added a `return' stament to +check_unaligned_access(). The return value of the function is `void' but +the RT tree has the backport of commit 73cf6167ef44c ("RISC-V: Probe +misaligned access speed in parallel") which changes the return type to +`int'. + +Make the return statement return 0. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams +--- + arch/riscv/kernel/cpufeature.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c +index c3cdcad4f324..66d2b0455dcc 100644 +--- a/arch/riscv/kernel/cpufeature.c ++++ b/arch/riscv/kernel/cpufeature.c +@@ -592,7 +592,7 @@ static int check_unaligned_access(void *param) - #ifdef CONFIG_SMP -@@ -12511,7 +12527,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr) - */ - if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 && - __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) -- resched_curr(rq); -+ resched_curr_lazy(rq); - } + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) +- return; ++ return 0; - /* -@@ -12676,7 +12692,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) - */ - if (task_current(rq, p)) { - if (p->prio > oldprio) -- resched_curr(rq); -+ resched_curr_lazy(rq); - } else - wakeup_preempt(rq, p, 0); - } -diff --git a/kernel/sched/features.h b/kernel/sched/features.h -index f77016823..dd8b35f67 100644 ---- a/kernel/sched/features.h -+++ b/kernel/sched/features.h -@@ -89,3 +89,5 @@ SCHED_FEAT(UTIL_EST_FASTUP, true) - SCHED_FEAT(LATENCY_WARN, false) + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); +-- +2.51.0 + +From 07a691a85b9c740bc5b058a4a8b3a0542069fb18 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Thu, 24 Oct 2024 13:54:42 -0500 +Subject: [PATCH 198/213] Linux 6.6.58-rt45 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 49bae8d6aa67..38c40b21a885 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt38 ++-rt45 +-- +2.51.0 + +From 83d038dec5c9cdb3fc6ab7998481678205ddc68e Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Tue, 26 Nov 2024 08:37:58 -0600 +Subject: [PATCH 199/213] Linux 6.6.63-rt46 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 38c40b21a885..272158183778 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt45 ++-rt46 +-- +2.51.0 + +From d3693f871519085c585b0a0cd33a97932f4e920d Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 28 Oct 2024 01:07:26 +0000 +Subject: [PATCH 200/213] Fwd: [PATCH v6.6-RT] kvm: Use the proper mask to + check for the LAZY bit. + +kvm: Use the proper mask to check for the LAZY bit. + +To check for the LAZY bit in ti_work, it is needed to use +_TIF_NEED_RESCHED_LAZY which contains the shifted bit (the mask) instead +of TIF_NEED_RESCHED_LAZY which contains the bit number. + +This broke while switch from the old PREEMPT_LAZY code to PREEMPT_AUTO. + +Use _TIF_NEED_RESCHED_LAZY instead of TIF_NEED_RESCHED_LAZY. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams +--- + kernel/entry/kvm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c +index d952fa5ee880..5253d3d2d013 100644 +--- a/kernel/entry/kvm.c ++++ b/kernel/entry/kvm.c +@@ -13,7 +13,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) + return -EINTR; + } - SCHED_FEAT(HZ_BW, true) -+ -+SCHED_FEAT(FORCE_NEED_RESCHED, false) -diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c -index 565f8374d..22d70000a 100644 ---- a/kernel/sched/idle.c -+++ b/kernel/sched/idle.c -@@ -57,8 +57,7 @@ static noinline int __cpuidle cpu_idle_poll(void) - ct_cpuidle_enter(); +- if (ti_work & (_TIF_NEED_RESCHED | TIF_NEED_RESCHED_LAZY)) ++ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) + schedule(); - raw_local_irq_enable(); -- while (!tif_need_resched() && -- (cpu_idle_force_poll || tick_check_broadcast_expired())) -+ while (!need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) - cpu_relax(); - raw_local_irq_disable(); + if (ti_work & _TIF_NOTIFY_RESUME) +-- +2.51.0 + +From 4dc91524a478ad76dcb027ba2cd6c481d5dc79fd Mon Sep 17 00:00:00 2001 +From: Derek Barbosa +Date: Mon, 4 Nov 2024 09:29:34 -0500 +Subject: [PATCH 201/213] printk: nbcon: Fix illegal RCU usage on thread wakeup + +In debug kernels, printk'ing during the SMP startup of a secondary CPU +results in a splat on boot that details illegal RCU usage from +offline CPUs. + +This patch aligns rcuwait_has_sleeper() with what currently exists in +Torvalds' tree, at commit 76f258bf3f2aa, and will avoid the +aforementioned splat. + +Furthermore, this patch makes it so that we avoid the use of hacky +rcu_derefrencing through the waiter's task struct,and instead leverages +the correct RCU API with the rcuwait_active() function. + +Reported-by: Andrew Halaney +Closes: https://lore.kernel.org/linux-rt-users/5x4nejpojrtny37k7l6loewqwuaituq77zc3tkkojvawcuvmml@thwg65fdb3vn +Reviewed-by: Sebastian Andrzej Siewior +Reviewed-by: John Ogness +Signed-off-by: Derek Barbosa +--- + kernel/printk/nbcon.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c +index b941039ee7d2..b4278854eeb9 100644 +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1101,9 +1101,6 @@ static void nbcon_irq_work(struct irq_work *irq_work) -diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c -index b89223a97..ad523419c 100644 ---- a/kernel/sched/rt.c -+++ b/kernel/sched/rt.c -@@ -2244,8 +2244,11 @@ static int rto_next_cpu(struct root_domain *rd) + static inline bool rcuwait_has_sleeper(struct rcuwait *w) + { +- bool has_sleeper; +- +- rcu_read_lock(); + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. +@@ -1116,10 +1113,7 @@ static inline bool rcuwait_has_sleeper(struct rcuwait *w) + * This pairs with nbcon_kthread_func:A. + */ + smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ +- has_sleeper = !!rcu_dereference(w->task); +- rcu_read_unlock(); +- +- return has_sleeper; ++ return rcuwait_active(w); + } - rd->rto_cpu = cpu; + /** +-- +2.51.0 + +From d32eb74c78466624cd8946056f2298609546f578 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 1 Mar 2024 22:45:25 +0100 +Subject: [PATCH 202/213] serial: sifive: Use uart_prepare_sysrq_char() to + handle sysrq. + +The port lock is a spinlock_t which is becomes a sleeping lock on PREEMPT_RT. +The driver splits the locking function into two parts: local_irq_save() and +uart_port_lock() and this breaks PREEMPT_RT. + +Handle sysrq requests sysrq once the port lock is dropped. +Remove the special case in the console write routine an always use the +complete locking function. + +Cc: Palmer Dabbelt +Cc: Paul Walmsley +Cc: linux-riscv@lists.infradead.org +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20240301215246.891055-13-bigeasy@linutronix.de +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 32c694ec3efc2b7cdf921da50371297ba70e7d50) +Signed-off-by: Clark Williams +--- + drivers/tty/serial/sifive.c | 17 +++++++---------- + 1 file changed, 7 insertions(+), 10 deletions(-) + +diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c +index fea6b999ba0e..2d7b696fea49 100644 +--- a/drivers/tty/serial/sifive.c ++++ b/drivers/tty/serial/sifive.c +@@ -412,7 +412,8 @@ static void __ssp_receive_chars(struct sifive_serial_port *ssp) + break; -- if (cpu < nr_cpu_ids) -+ if (cpu < nr_cpu_ids) { -+ if (!has_pushable_tasks(cpu_rq(cpu))) -+ continue; - return cpu; -+ } + ssp->port.icount.rx++; +- uart_insert_char(&ssp->port, 0, 0, ch, TTY_NORMAL); ++ if (!uart_prepare_sysrq_char(&ssp->port, ch)) ++ uart_insert_char(&ssp->port, 0, 0, ch, TTY_NORMAL); + } - rd->rto_cpu = -1; + tty_flip_buffer_push(&ssp->port.state->port); +@@ -534,7 +535,7 @@ static irqreturn_t sifive_serial_irq(int irq, void *dev_id) + if (ip & SIFIVE_SERIAL_IP_TXWM_MASK) + __ssp_transmit_chars(ssp); -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index d48c6a292..0bdd8f7b6 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2444,6 +2444,7 @@ extern void init_sched_fair_class(void); - extern void reweight_task(struct task_struct *p, const struct load_weight *lw); +- uart_port_unlock(&ssp->port); ++ uart_unlock_and_check_sysrq(&ssp->port); - extern void resched_curr(struct rq *rq); -+extern void resched_curr_lazy(struct rq *rq); - extern void resched_cpu(int cpu); + return IRQ_HANDLED; + } +@@ -797,13 +798,10 @@ static void sifive_serial_console_write(struct console *co, const char *s, + if (!ssp) + return; - extern struct rt_bandwidth def_rt_bandwidth; -diff --git a/kernel/signal.c b/kernel/signal.c -index 49c8c24b4..107953e8a 100644 ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -2331,15 +2331,35 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, - do_notify_parent_cldstop(current, false, why); +- local_irq_save(flags); +- if (ssp->port.sysrq) +- locked = 0; +- else if (oops_in_progress) +- locked = uart_port_trylock(&ssp->port); ++ if (oops_in_progress) ++ locked = uart_port_trylock_irqsave(&ssp->port, &flags); + else +- uart_port_lock(&ssp->port); ++ uart_port_lock_irqsave(&ssp->port, &flags); - /* -- * Don't want to allow preemption here, because -- * sys_ptrace() needs this task to be inactive. -+ * The previous do_notify_parent_cldstop() invocation woke ptracer. -+ * One a PREEMPTION kernel this can result in preemption requirement -+ * which will be fulfilled after read_unlock() and the ptracer will be -+ * put on the CPU. -+ * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for -+ * this task wait in schedule(). If this task gets preempted then it -+ * remains enqueued on the runqueue. The ptracer will observe this and -+ * then sleep for a delay of one HZ tick. In the meantime this task -+ * gets scheduled, enters schedule() and will wait for the ptracer. - * -- * XXX: implement read_unlock_no_resched(). -+ * This preemption point is not bad from correctness point of view but -+ * extends the runtime by one HZ tick time due to the ptracer's sleep. -+ * The preempt-disable section ensures that there will be no preemption -+ * between unlock and schedule() and so improving the performance since -+ * the ptracer has no reason to sleep. -+ * -+ * On PREEMPT_RT locking tasklist_lock does not disable preemption. -+ * Therefore the task can be preempted (after -+ * do_notify_parent_cldstop()) before unlocking tasklist_lock so there -+ * is no benefit in doing this. The optimisation is harmful on -+ * PEEMPT_RT because the spinlock_t (in cgroup_enter_frozen()) must not -+ * be acquired with disabled preemption. - */ -- preempt_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); - read_unlock(&tasklist_lock); - cgroup_enter_frozen(); -- preempt_enable_no_resched(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable_no_resched(); - schedule(); - cgroup_leave_frozen(true); + ier = __ssp_readl(ssp, SIFIVE_SERIAL_IE_OFFS); + __ssp_writel(0, SIFIVE_SERIAL_IE_OFFS, ssp); +@@ -813,8 +811,7 @@ static void sifive_serial_console_write(struct console *co, const char *s, + __ssp_writel(ier, SIFIVE_SERIAL_IE_OFFS, ssp); -diff --git a/kernel/softirq.c b/kernel/softirq.c -index f24d80cf2..e3b9ffd7f 100644 ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -247,6 +247,19 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) + if (locked) +- uart_port_unlock(&ssp->port); +- local_irq_restore(flags); ++ uart_port_unlock_irqrestore(&ssp->port, flags); } - EXPORT_SYMBOL(__local_bh_enable_ip); -+void softirq_preempt(void) -+{ -+ if (WARN_ON_ONCE(!preemptible())) -+ return; -+ -+ if (WARN_ON_ONCE(__this_cpu_read(softirq_ctrl.cnt) != SOFTIRQ_OFFSET)) -+ return; + static int sifive_serial_console_setup(struct console *co, char *options) +-- +2.51.0 + +From b3eeb6ae41064e6ee97a75de2e18b4e3c6eea61f Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Tue, 17 Dec 2024 08:04:45 -0600 +Subject: [PATCH 203/213] Linux 6.6.65-rt47 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 272158183778..8a777ac42aab 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt46 ++-rt47 +-- +2.51.0 + +From 15a01a38466248259f416338e166e9fe85e8f8c9 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Mon, 27 Jan 2025 11:25:21 -0600 +Subject: [PATCH 204/213] Revert "sign-file,extract-cert: use pkcs11 provider + for OPENSSL MAJOR >= 3" + +This reverts commit 3b89983cc22f6c7adf79c922a55738f97b4381df. +--- + certs/extract-cert.c | 103 +++++++++++++------------------------------ + scripts/sign-file.c | 93 ++++++++++++-------------------------- + 2 files changed, 58 insertions(+), 138 deletions(-) + +diff --git a/certs/extract-cert.c b/certs/extract-cert.c +index 7d6d468ed612..61bbe0085671 100644 +--- a/certs/extract-cert.c ++++ b/certs/extract-cert.c +@@ -21,18 +21,17 @@ + #include + #include + #include +-#if OPENSSL_VERSION_MAJOR >= 3 +-# define USE_PKCS11_PROVIDER +-# include +-# include +-#else +-# if !defined(OPENSSL_NO_ENGINE) && !defined(OPENSSL_NO_DEPRECATED_3_0) +-# define USE_PKCS11_ENGINE +-# include +-# endif +-#endif ++#include + -+ __local_bh_enable(SOFTIRQ_OFFSET, true); -+ /* preemption point */ -+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); -+} + #include "ssl-common.h" + ++/* ++ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. ++ * ++ * Remove this if/when that API is no longer used ++ */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + - /* - * Invoked from ksoftirqd_run() outside of the interrupt disabled section - * to acquire the per CPU local lock for reentrancy protection. -@@ -630,6 +643,24 @@ static inline void tick_irq_exit(void) - #endif + #define PKEY_ID_PKCS7 2 + + static __attribute__((noreturn)) +@@ -62,66 +61,6 @@ static void write_cert(X509 *x509) + fprintf(stderr, "Extracted cert: %s\n", buf); } -+#ifdef CONFIG_PREEMPT_RT -+DEFINE_PER_CPU(struct task_struct *, timersd); -+DEFINE_PER_CPU(unsigned long, pending_timer_softirq); -+ -+static void wake_timersd(void) -+{ -+ struct task_struct *tsk = __this_cpu_read(timersd); -+ -+ if (tsk) -+ wake_up_process(tsk); -+} -+ -+#else -+ -+static inline void wake_timersd(void) { } -+ -+#endif -+ - static inline void __irq_exit_rcu(void) +-static X509 *load_cert_pkcs11(const char *cert_src) +-{ +- X509 *cert = NULL; +-#ifdef USE_PKCS11_PROVIDER +- OSSL_STORE_CTX *store; +- +- if (!OSSL_PROVIDER_try_load(NULL, "pkcs11", true)) +- ERR(1, "OSSL_PROVIDER_try_load(pkcs11)"); +- if (!OSSL_PROVIDER_try_load(NULL, "default", true)) +- ERR(1, "OSSL_PROVIDER_try_load(default)"); +- +- store = OSSL_STORE_open(cert_src, NULL, NULL, NULL, NULL); +- ERR(!store, "OSSL_STORE_open"); +- +- while (!OSSL_STORE_eof(store)) { +- OSSL_STORE_INFO *info = OSSL_STORE_load(store); +- +- if (!info) { +- drain_openssl_errors(__LINE__, 0); +- continue; +- } +- if (OSSL_STORE_INFO_get_type(info) == OSSL_STORE_INFO_CERT) { +- cert = OSSL_STORE_INFO_get1_CERT(info); +- ERR(!cert, "OSSL_STORE_INFO_get1_CERT"); +- } +- OSSL_STORE_INFO_free(info); +- if (cert) +- break; +- } +- OSSL_STORE_close(store); +-#elif defined(USE_PKCS11_ENGINE) +- ENGINE *e; +- struct { +- const char *cert_id; +- X509 *cert; +- } parms; +- +- parms.cert_id = cert_src; +- parms.cert = NULL; +- +- ENGINE_load_builtin_engines(); +- drain_openssl_errors(__LINE__, 1); +- e = ENGINE_by_id("pkcs11"); +- ERR(!e, "Load PKCS#11 ENGINE"); +- if (ENGINE_init(e)) +- drain_openssl_errors(__LINE__, 1); +- else +- ERR(1, "ENGINE_init"); +- if (key_pass) +- ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN"); +- ENGINE_ctrl_cmd(e, "LOAD_CERT_CTRL", 0, &parms, NULL, 1); +- ERR(!parms.cert, "Get X.509 from PKCS#11"); +- cert = parms.cert; +-#else +- fprintf(stderr, "no pkcs11 engine/provider available\n"); +- exit(1); +-#endif +- return cert; +-} +- + int main(int argc, char **argv) { - #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED -@@ -642,6 +673,10 @@ static inline void __irq_exit_rcu(void) - if (!in_interrupt() && local_softirq_pending()) - invoke_softirq(); + char *cert_src; +@@ -150,10 +89,28 @@ int main(int argc, char **argv) + fclose(f); + exit(0); + } else if (!strncmp(cert_src, "pkcs11:", 7)) { +- X509 *cert = load_cert_pkcs11(cert_src); ++ ENGINE *e; ++ struct { ++ const char *cert_id; ++ X509 *cert; ++ } parms; + +- ERR(!cert, "load_cert_pkcs11 failed"); +- write_cert(cert); ++ parms.cert_id = cert_src; ++ parms.cert = NULL; ++ ++ ENGINE_load_builtin_engines(); ++ drain_openssl_errors(__LINE__, 1); ++ e = ENGINE_by_id("pkcs11"); ++ ERR(!e, "Load PKCS#11 ENGINE"); ++ if (ENGINE_init(e)) ++ drain_openssl_errors(__LINE__, 1); ++ else ++ ERR(1, "ENGINE_init"); ++ if (key_pass) ++ ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN"); ++ ENGINE_ctrl_cmd(e, "LOAD_CERT_CTRL", 0, &parms, NULL, 1); ++ ERR(!parms.cert, "Get X.509 from PKCS#11"); ++ write_cert(parms.cert); + } else { + BIO *b; + X509 *x509; +diff --git a/scripts/sign-file.c b/scripts/sign-file.c +index 7070245edfc1..bb3fdf1a617c 100644 +--- a/scripts/sign-file.c ++++ b/scripts/sign-file.c +@@ -27,18 +27,17 @@ + #include + #include + #include +-#if OPENSSL_VERSION_MAJOR >= 3 +-# define USE_PKCS11_PROVIDER +-# include +-# include +-#else +-# if !defined(OPENSSL_NO_ENGINE) && !defined(OPENSSL_NO_DEPRECATED_3_0) +-# define USE_PKCS11_ENGINE +-# include +-# endif +-#endif ++#include ++ + #include "ssl-common.h" -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() && -+ !(in_nmi() | in_hardirq())) -+ wake_timersd(); ++/* ++ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. ++ * ++ * Remove this if/when that API is no longer used ++ */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + - tick_irq_exit(); + /* + * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to + * assume that it's not available and its header file is missing and that we +@@ -107,64 +106,28 @@ static int pem_pw_cb(char *buf, int len, int w, void *v) + return pwlen; } -@@ -974,12 +1009,70 @@ static struct smp_hotplug_thread softirq_threads = { - .thread_comm = "ksoftirqd/%u", - }; - -+#ifdef CONFIG_PREEMPT_RT -+static void timersd_setup(unsigned int cpu) -+{ -+ sched_set_fifo_low(current); -+} -+ -+static int timersd_should_run(unsigned int cpu) -+{ -+ return local_pending_timers(); -+} -+ -+static void run_timersd(unsigned int cpu) -+{ -+ unsigned int timer_si; -+ -+ ksoftirqd_run_begin(); -+ -+ timer_si = local_pending_timers(); -+ __this_cpu_write(pending_timer_softirq, 0); -+ or_softirq_pending(timer_si); -+ -+ __do_softirq(); -+ -+ ksoftirqd_run_end(); -+} -+ -+static void raise_ktimers_thread(unsigned int nr) -+{ -+ trace_softirq_raise(nr); -+ __this_cpu_or(pending_timer_softirq, 1 << nr); -+} -+ -+void raise_hrtimer_softirq(void) -+{ -+ raise_ktimers_thread(HRTIMER_SOFTIRQ); -+} -+ -+void raise_timer_softirq(void) -+{ -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ raise_ktimers_thread(TIMER_SOFTIRQ); -+ wake_timersd(); -+ local_irq_restore(flags); -+} -+ -+static struct smp_hotplug_thread timer_threads = { -+ .store = &timersd, -+ .setup = timersd_setup, -+ .thread_should_run = timersd_should_run, -+ .thread_fn = run_timersd, -+ .thread_comm = "ktimers/%u", -+}; -+#endif -+ - static __init int spawn_ksoftirqd(void) +-static EVP_PKEY *read_private_key_pkcs11(const char *private_key_name) ++static EVP_PKEY *read_private_key(const char *private_key_name) { - cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, - takeover_tasklets); - BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); +- EVP_PKEY *private_key = NULL; +-#ifdef USE_PKCS11_PROVIDER +- OSSL_STORE_CTX *store; - -+#ifdef CONFIG_PREEMPT_RT -+ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); -+#endif - return 0; - } - early_initcall(spawn_ksoftirqd); -diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c -index 877535b06..07cc1b209 100644 ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -1814,7 +1814,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) - if (!ktime_before(now, cpu_base->softirq_expires_next)) { - cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->softirq_activated = 1; -- raise_softirq_irqoff(HRTIMER_SOFTIRQ); -+ raise_hrtimer_softirq(); - } +- if (!OSSL_PROVIDER_try_load(NULL, "pkcs11", true)) +- ERR(1, "OSSL_PROVIDER_try_load(pkcs11)"); +- if (!OSSL_PROVIDER_try_load(NULL, "default", true)) +- ERR(1, "OSSL_PROVIDER_try_load(default)"); +- +- store = OSSL_STORE_open(private_key_name, NULL, NULL, NULL, NULL); +- ERR(!store, "OSSL_STORE_open"); ++ EVP_PKEY *private_key; - __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); -@@ -1927,7 +1927,7 @@ void hrtimer_run_queues(void) - if (!ktime_before(now, cpu_base->softirq_expires_next)) { - cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->softirq_activated = 1; -- raise_softirq_irqoff(HRTIMER_SOFTIRQ); -+ raise_hrtimer_softirq(); - } +- while (!OSSL_STORE_eof(store)) { +- OSSL_STORE_INFO *info = OSSL_STORE_load(store); ++ if (!strncmp(private_key_name, "pkcs11:", 7)) { ++ ENGINE *e; - __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); -diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c -index 55cbc49f7..1a0ed106b 100644 ---- a/kernel/time/tick-sched.c -+++ b/kernel/time/tick-sched.c -@@ -795,7 +795,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) +- if (!info) { +- drain_openssl_errors(__LINE__, 0); +- continue; +- } +- if (OSSL_STORE_INFO_get_type(info) == OSSL_STORE_INFO_PKEY) { +- private_key = OSSL_STORE_INFO_get1_PKEY(info); +- ERR(!private_key, "OSSL_STORE_INFO_get1_PKEY"); +- } +- OSSL_STORE_INFO_free(info); +- if (private_key) +- break; +- } +- OSSL_STORE_close(store); +-#elif defined(USE_PKCS11_ENGINE) +- ENGINE *e; +- +- ENGINE_load_builtin_engines(); +- drain_openssl_errors(__LINE__, 1); +- e = ENGINE_by_id("pkcs11"); +- ERR(!e, "Load PKCS#11 ENGINE"); +- if (ENGINE_init(e)) ++ ENGINE_load_builtin_engines(); + drain_openssl_errors(__LINE__, 1); +- else +- ERR(1, "ENGINE_init"); +- if (key_pass) +- ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN"); +- private_key = ENGINE_load_private_key(e, private_key_name, NULL, NULL); +- ERR(!private_key, "%s", private_key_name); +-#else +- fprintf(stderr, "no pkcs11 engine/provider available\n"); +- exit(1); +-#endif +- return private_key; +-} +- +-static EVP_PKEY *read_private_key(const char *private_key_name) +-{ +- if (!strncmp(private_key_name, "pkcs11:", 7)) { +- return read_private_key_pkcs11(private_key_name); ++ e = ENGINE_by_id("pkcs11"); ++ ERR(!e, "Load PKCS#11 ENGINE"); ++ if (ENGINE_init(e)) ++ drain_openssl_errors(__LINE__, 1); ++ else ++ ERR(1, "ENGINE_init"); ++ if (key_pass) ++ ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), ++ "Set PKCS#11 PIN"); ++ private_key = ENGINE_load_private_key(e, private_key_name, ++ NULL, NULL); ++ ERR(!private_key, "%s", private_key_name); + } else { +- EVP_PKEY *private_key; + BIO *b; + + b = BIO_new_file(private_key_name, "rb"); +@@ -173,9 +136,9 @@ static EVP_PKEY *read_private_key(const char *private_key_name) + NULL); + ERR(!private_key, "%s", private_key_name); + BIO_free(b); +- +- return private_key; + } ++ ++ return private_key; + } + + static X509 *read_x509(const char *x509_name) +-- +2.51.0 + +From 4b362d26e428d19b76203ba7bb1572c0ebb0d7ae Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Mon, 27 Jan 2025 11:25:32 -0600 +Subject: [PATCH 205/213] Revert "sign-file,extract-cert: avoid using + deprecated ERR_get_error_line()" + +This reverts commit 2a9206dfd9297ee544de2d240be22466d8dc094b. +--- + certs/extract-cert.c | 4 ++-- + scripts/sign-file.c | 6 +++--- + scripts/ssl-common.h | 23 +++++++++++++++-------- + 3 files changed, 20 insertions(+), 13 deletions(-) + +diff --git a/certs/extract-cert.c b/certs/extract-cert.c +index 61bbe0085671..8e7ba9974a1f 100644 +--- a/certs/extract-cert.c ++++ b/certs/extract-cert.c +@@ -99,11 +99,11 @@ int main(int argc, char **argv) + parms.cert = NULL; + + ENGINE_load_builtin_engines(); +- drain_openssl_errors(__LINE__, 1); ++ drain_openssl_errors(); + e = ENGINE_by_id("pkcs11"); + ERR(!e, "Load PKCS#11 ENGINE"); + if (ENGINE_init(e)) +- drain_openssl_errors(__LINE__, 1); ++ drain_openssl_errors(); + else + ERR(1, "ENGINE_init"); + if (key_pass) +diff --git a/scripts/sign-file.c b/scripts/sign-file.c +index bb3fdf1a617c..39ba58db5d4e 100644 +--- a/scripts/sign-file.c ++++ b/scripts/sign-file.c +@@ -114,11 +114,11 @@ static EVP_PKEY *read_private_key(const char *private_key_name) + ENGINE *e; + + ENGINE_load_builtin_engines(); +- drain_openssl_errors(__LINE__, 1); ++ drain_openssl_errors(); + e = ENGINE_by_id("pkcs11"); + ERR(!e, "Load PKCS#11 ENGINE"); + if (ENGINE_init(e)) +- drain_openssl_errors(__LINE__, 1); ++ drain_openssl_errors(); + else + ERR(1, "ENGINE_init"); + if (key_pass) +@@ -273,7 +273,7 @@ int main(int argc, char **argv) + + /* Digest the module data. */ + OpenSSL_add_all_digests(); +- drain_openssl_errors(__LINE__, 0); ++ display_openssl_errors(__LINE__); + digest_algo = EVP_get_digestbyname(hash_algo); + ERR(!digest_algo, "EVP_get_digestbyname"); + +diff --git a/scripts/ssl-common.h b/scripts/ssl-common.h +index 2db0e181143c..e6711c75ed91 100644 +--- a/scripts/ssl-common.h ++++ b/scripts/ssl-common.h +@@ -3,7 +3,7 @@ + * SSL helper functions shared by sign-file and extract-cert. + */ - static inline bool local_timer_softirq_pending(void) +-static void drain_openssl_errors(int l, int silent) ++static void display_openssl_errors(int l) { -- return local_softirq_pending() & BIT(TIMER_SOFTIRQ); -+ return local_pending_timers() & BIT(TIMER_SOFTIRQ); + const char *file; + char buf[120]; +@@ -11,21 +11,28 @@ static void drain_openssl_errors(int l, int silent) + + if (ERR_peek_error() == 0) + return; +- if (!silent) +- fprintf(stderr, "At main.c:%d:\n", l); ++ fprintf(stderr, "At main.c:%d:\n", l); + +- while ((e = ERR_peek_error_line(&file, &line))) { ++ while ((e = ERR_get_error_line(&file, &line))) { + ERR_error_string(e, buf); +- if (!silent) +- fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); +- ERR_get_error(); ++ fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); + } } - static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) -diff --git a/kernel/time/timer.c b/kernel/time/timer.c -index 63a8ce717..b3fbe97d1 100644 ---- a/kernel/time/timer.c -+++ b/kernel/time/timer.c -@@ -1470,9 +1470,16 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) - */ - static void timer_sync_wait_running(struct timer_base *base) - { -- if (atomic_read(&base->timer_waiters)) { -+ bool need_preempt; -+ -+ need_preempt = task_is_pi_boosted(current); -+ if (need_preempt || atomic_read(&base->timer_waiters)) { - raw_spin_unlock_irq(&base->lock); - spin_unlock(&base->expiry_lock); ++static void drain_openssl_errors(void) ++{ ++ const char *file; ++ int line; + -+ if (need_preempt) -+ softirq_preempt(); ++ if (ERR_peek_error() == 0) ++ return; ++ while (ERR_get_error_line(&file, &line)) {} ++} + - spin_lock(&base->expiry_lock); - raw_spin_lock_irq(&base->lock); - } -@@ -2054,7 +2061,7 @@ static void run_local_timers(void) - if (time_before(jiffies, base->next_expiry)) - return; - } -- raise_softirq(TIMER_SOFTIRQ); -+ raise_timer_softirq(); + #define ERR(cond, fmt, ...) \ + do { \ + bool __cond = (cond); \ +- drain_openssl_errors(__LINE__, 0); \ ++ display_openssl_errors(__LINE__); \ + if (__cond) { \ + errx(1, fmt, ## __VA_ARGS__); \ + } \ +-- +2.51.0 + +From 25649a0a0d031bcd20bbdc0a0a63437bd9e6358f Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Mon, 27 Jan 2025 11:25:42 -0600 +Subject: [PATCH 206/213] Revert "sign-file,extract-cert: move common SSL + helper functions to a header" + +This reverts commit 20a60c4db6e2ddd1b2e9a68b9a6291524b75611a. +--- + MAINTAINERS | 1 - + certs/Makefile | 2 +- + certs/extract-cert.c | 37 +++++++++++++++++++++++++++++++++++-- + scripts/sign-file.c | 37 +++++++++++++++++++++++++++++++++++-- + scripts/ssl-common.h | 39 --------------------------------------- + 5 files changed, 71 insertions(+), 45 deletions(-) + delete mode 100644 scripts/ssl-common.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 294d2ce29b73..ae4c0cec5073 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -4784,7 +4784,6 @@ S: Maintained + F: Documentation/admin-guide/module-signing.rst + F: certs/ + F: scripts/sign-file.c +-F: scripts/ssl-common.h + F: tools/certs/ + + CFAG12864B LCD DRIVER +diff --git a/certs/Makefile b/certs/Makefile +index 67e1f2707c2f..799ad7b9e68a 100644 +--- a/certs/Makefile ++++ b/certs/Makefile +@@ -84,5 +84,5 @@ targets += x509_revocation_list + + hostprogs := extract-cert + +-HOSTCFLAGS_extract-cert.o = $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) -I$(srctree)/scripts ++HOSTCFLAGS_extract-cert.o = $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) + HOSTLDLIBS_extract-cert = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) +diff --git a/certs/extract-cert.c b/certs/extract-cert.c +index 8e7ba9974a1f..70e9ec89d87d 100644 +--- a/certs/extract-cert.c ++++ b/certs/extract-cert.c +@@ -23,8 +23,6 @@ + #include + #include + +-#include "ssl-common.h" +- + /* + * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. + * +@@ -42,6 +40,41 @@ void format(void) + exit(2); } ++static void display_openssl_errors(int l) ++{ ++ const char *file; ++ char buf[120]; ++ int e, line; ++ ++ if (ERR_peek_error() == 0) ++ return; ++ fprintf(stderr, "At main.c:%d:\n", l); ++ ++ while ((e = ERR_get_error_line(&file, &line))) { ++ ERR_error_string(e, buf); ++ fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); ++ } ++} ++ ++static void drain_openssl_errors(void) ++{ ++ const char *file; ++ int line; ++ ++ if (ERR_peek_error() == 0) ++ return; ++ while (ERR_get_error_line(&file, &line)) {} ++} ++ ++#define ERR(cond, fmt, ...) \ ++ do { \ ++ bool __cond = (cond); \ ++ display_openssl_errors(__LINE__); \ ++ if (__cond) { \ ++ err(1, fmt, ## __VA_ARGS__); \ ++ } \ ++ } while(0) ++ + static const char *key_pass; + static BIO *wb; + static char *cert_dst; +diff --git a/scripts/sign-file.c b/scripts/sign-file.c +index 39ba58db5d4e..3edb156ae52c 100644 +--- a/scripts/sign-file.c ++++ b/scripts/sign-file.c +@@ -29,8 +29,6 @@ + #include + #include + +-#include "ssl-common.h" +- /* -diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index 9d9af60b2..47fa2b613 100644 ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -2706,6 +2706,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) + * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. + * +@@ -85,6 +83,41 @@ void format(void) + exit(2); + } - if (tif_need_resched()) - trace_flags |= TRACE_FLAG_NEED_RESCHED; -+ if (tif_need_resched_lazy()) -+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; - if (test_preempt_need_resched()) - trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; - return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | -diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c -index 2b948d35f..959f09ab8 100644 ---- a/kernel/trace/trace_output.c -+++ b/kernel/trace/trace_output.c -@@ -464,17 +464,29 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) - (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' : - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - bh_off ? 'b' : -- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : -+ !IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' : - '.'; ++static void display_openssl_errors(int l) ++{ ++ const char *file; ++ char buf[120]; ++ int e, line; ++ ++ if (ERR_peek_error() == 0) ++ return; ++ fprintf(stderr, "At main.c:%d:\n", l); ++ ++ while ((e = ERR_get_error_line(&file, &line))) { ++ ERR_error_string(e, buf); ++ fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); ++ } ++} ++ ++static void drain_openssl_errors(void) ++{ ++ const char *file; ++ int line; ++ ++ if (ERR_peek_error() == 0) ++ return; ++ while (ERR_get_error_line(&file, &line)) {} ++} ++ ++#define ERR(cond, fmt, ...) \ ++ do { \ ++ bool __cond = (cond); \ ++ display_openssl_errors(__LINE__); \ ++ if (__cond) { \ ++ errx(1, fmt, ## __VA_ARGS__); \ ++ } \ ++ } while(0) ++ + static const char *key_pass; + + static int pem_pw_cb(char *buf, int len, int w, void *v) +diff --git a/scripts/ssl-common.h b/scripts/ssl-common.h +deleted file mode 100644 +index e6711c75ed91..000000000000 +--- a/scripts/ssl-common.h ++++ /dev/null +@@ -1,39 +0,0 @@ +-/* SPDX-License-Identifier: LGPL-2.1+ */ +-/* +- * SSL helper functions shared by sign-file and extract-cert. +- */ +- +-static void display_openssl_errors(int l) +-{ +- const char *file; +- char buf[120]; +- int e, line; +- +- if (ERR_peek_error() == 0) +- return; +- fprintf(stderr, "At main.c:%d:\n", l); +- +- while ((e = ERR_get_error_line(&file, &line))) { +- ERR_error_string(e, buf); +- fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); +- } +-} +- +-static void drain_openssl_errors(void) +-{ +- const char *file; +- int line; +- +- if (ERR_peek_error() == 0) +- return; +- while (ERR_get_error_line(&file, &line)) {} +-} +- +-#define ERR(cond, fmt, ...) \ +- do { \ +- bool __cond = (cond); \ +- display_openssl_errors(__LINE__); \ +- if (__cond) { \ +- errx(1, fmt, ## __VA_ARGS__); \ +- } \ +- } while (0) +-- +2.51.0 + +From eb8bfdc6cab3340d44e1538c4eb2ee47907be267 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Mon, 27 Jan 2025 11:07:03 -0600 +Subject: [PATCH 207/213] misc: fix missing hunks from previous mis-merge + +Dropped hunks were discovered by Sebastian during the v6.6.66 +merge conflict resolution. Fix those. + +Reported-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams + +Changes to be committed: + modified: drivers/gpu/drm/i915/gt/intel_breadcrumbs.c + modified: drivers/tty/serial/sc16is7xx.c +--- + drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 7 +------ + drivers/tty/serial/sc16is7xx.c | 5 ----- + 2 files changed, 1 insertion(+), 11 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +index aa77f8601b8a..f2973cd1a8aa 100644 +--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c ++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +@@ -315,12 +315,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) + return; -- switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | -+ switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | - TRACE_FLAG_PREEMPT_RESCHED)) { -+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED: -+ need_resched = 'B'; -+ break; - case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: - need_resched = 'N'; - break; -+ case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED: -+ need_resched = 'L'; -+ break; -+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY: -+ need_resched = 'b'; -+ break; - case TRACE_FLAG_NEED_RESCHED: - need_resched = 'n'; - break; -+ case TRACE_FLAG_NEED_RESCHED_LAZY: -+ need_resched = 'l'; -+ break; - case TRACE_FLAG_PREEMPT_RESCHED: - need_resched = 'p'; - break; + /* Kick the work once more to drain the signalers, and disarm the irq */ +- irq_work_sync(&b->irq_work); +- while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { +- irq_work_queue(&b->irq_work); +- cond_resched(); +- irq_work_sync(&b->irq_work); +- } ++ irq_work_queue(&b->irq_work); + } + + void intel_breadcrumbs_free(struct kref *kref) +diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c +index fd0f03965c11..8a2ce2ca6b39 100644 +--- a/drivers/tty/serial/sc16is7xx.c ++++ b/drivers/tty/serial/sc16is7xx.c +@@ -821,7 +821,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) + { + struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); +- unsigned long flags; + + if ((port->rs485.flags & SER_RS485_ENABLED) && + (port->rs485.delay_rts_before_send > 0)) +@@ -830,10 +829,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) + mutex_lock(&one->efr_lock); + sc16is7xx_handle_tx(port); + mutex_unlock(&one->efr_lock); +- +- uart_port_lock_irqsave(port, &flags); +- sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); +- uart_port_unlock_irqrestore(port, flags); + } + + static void sc16is7xx_reconf_rs485(struct uart_port *port) +-- +2.51.0 + +From 00f794b08411887f990fbc62dea54d02debea3dc Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Fri, 28 Feb 2025 11:40:08 -0600 +Subject: [PATCH 208/213] Linux 6.6.78-rt51 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + diff --git a/localversion-rt b/localversion-rt -new file mode 100644 -index 000000000..e6c0dc6a5 ---- /dev/null +index 8a777ac42aab..75493460c41f 100644 +--- a/localversion-rt +++ b/localversion-rt -@@ -0,0 +1 @@ -+-rt53 -diff --git a/net/core/dev.c b/net/core/dev.c -index c31a7f7be..23e49d9a7 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -4746,15 +4746,6 @@ static void rps_trigger_softirq(void *data) +@@ -1 +1 @@ +--rt47 ++-rt51 +-- +2.51.0 + +From 4c4740ed9a19a7945120b628e7ff4fd0ad8d9584 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Mon, 14 Apr 2025 18:23:49 -0500 +Subject: [PATCH 209/213] Linux 6.6.87-rt54 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 75493460c41f..3165a8781ff5 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt51 ++-rt54 +-- +2.51.0 + +From 3cd12d8931589bb236af54f0ed471b4588c1dfc0 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Sun, 25 May 2025 14:40:32 -0500 +Subject: [PATCH 210/213] certs/scripts: update to match v6.6.y + +updates to certificate handling + +Update certs/Makefile and scripts/sign-file.c and add the +MAINTAINERS: update CERTIFICATE HANDLING +Added new file scripts/ssl-common.h + +Signed-off-by: Clark Williams +--- + MAINTAINERS | 1 + + certs/Makefile | 2 +- + certs/extract-cert.c | 138 +++++++++++++++++++++++-------------------- + scripts/sign-file.c | 132 +++++++++++++++++++++-------------------- + scripts/ssl-common.h | 32 ++++++++++ + 5 files changed, 176 insertions(+), 129 deletions(-) + create mode 100644 scripts/ssl-common.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index ae4c0cec5073..294d2ce29b73 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -4784,6 +4784,7 @@ S: Maintained + F: Documentation/admin-guide/module-signing.rst + F: certs/ + F: scripts/sign-file.c ++F: scripts/ssl-common.h + F: tools/certs/ + + CFAG12864B LCD DRIVER +diff --git a/certs/Makefile b/certs/Makefile +index 799ad7b9e68a..67e1f2707c2f 100644 +--- a/certs/Makefile ++++ b/certs/Makefile +@@ -84,5 +84,5 @@ targets += x509_revocation_list + + hostprogs := extract-cert + +-HOSTCFLAGS_extract-cert.o = $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) ++HOSTCFLAGS_extract-cert.o = $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) -I$(srctree)/scripts + HOSTLDLIBS_extract-cert = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) +diff --git a/certs/extract-cert.c b/certs/extract-cert.c +index 70e9ec89d87d..7d6d468ed612 100644 +--- a/certs/extract-cert.c ++++ b/certs/extract-cert.c +@@ -21,14 +21,17 @@ + #include + #include + #include +-#include +- +-/* +- * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. +- * +- * Remove this if/when that API is no longer used +- */ +-#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++#if OPENSSL_VERSION_MAJOR >= 3 ++# define USE_PKCS11_PROVIDER ++# include ++# include ++#else ++# if !defined(OPENSSL_NO_ENGINE) && !defined(OPENSSL_NO_DEPRECATED_3_0) ++# define USE_PKCS11_ENGINE ++# include ++# endif ++#endif ++#include "ssl-common.h" - #endif /* CONFIG_RPS */ + #define PKEY_ID_PKCS7 2 --/* Called from hardirq (IPI) context */ --static void trigger_rx_softirq(void *data) +@@ -40,41 +43,6 @@ void format(void) + exit(2); + } + +-static void display_openssl_errors(int l) -{ -- struct softnet_data *sd = data; +- const char *file; +- char buf[120]; +- int e, line; - -- __raise_softirq_irqoff(NET_RX_SOFTIRQ); -- smp_store_release(&sd->defer_ipi_scheduled, 0); +- if (ERR_peek_error() == 0) +- return; +- fprintf(stderr, "At main.c:%d:\n", l); +- +- while ((e = ERR_get_error_line(&file, &line))) { +- ERR_error_string(e, buf); +- fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); +- } -} - - /* - * After we queued a packet into sd->input_pkt_queue, - * we need to make sure this queue is serviced soon. -@@ -6723,6 +6714,32 @@ static void skb_defer_free_flush(struct softnet_data *sd) - } +-static void drain_openssl_errors(void) +-{ +- const char *file; +- int line; +- +- if (ERR_peek_error() == 0) +- return; +- while (ERR_get_error_line(&file, &line)) {} +-} +- +-#define ERR(cond, fmt, ...) \ +- do { \ +- bool __cond = (cond); \ +- display_openssl_errors(__LINE__); \ +- if (__cond) { \ +- err(1, fmt, ## __VA_ARGS__); \ +- } \ +- } while(0) +- + static const char *key_pass; + static BIO *wb; + static char *cert_dst; +@@ -94,6 +62,66 @@ static void write_cert(X509 *x509) + fprintf(stderr, "Extracted cert: %s\n", buf); } -+#ifndef CONFIG_PREEMPT_RT -+ -+/* Called from hardirq (IPI) context */ -+static void trigger_rx_softirq(void *data) ++static X509 *load_cert_pkcs11(const char *cert_src) +{ -+ struct softnet_data *sd = data; -+ -+ __raise_softirq_irqoff(NET_RX_SOFTIRQ); -+ smp_store_release(&sd->defer_ipi_scheduled, 0); -+} ++ X509 *cert = NULL; ++#ifdef USE_PKCS11_PROVIDER ++ OSSL_STORE_CTX *store; + -+#else ++ if (!OSSL_PROVIDER_try_load(NULL, "pkcs11", true)) ++ ERR(1, "OSSL_PROVIDER_try_load(pkcs11)"); ++ if (!OSSL_PROVIDER_try_load(NULL, "default", true)) ++ ERR(1, "OSSL_PROVIDER_try_load(default)"); + -+static void trigger_rx_softirq(struct work_struct *defer_work) -+{ -+ struct softnet_data *sd; ++ store = OSSL_STORE_open(cert_src, NULL, NULL, NULL, NULL); ++ ERR(!store, "OSSL_STORE_open"); + -+ sd = container_of(defer_work, struct softnet_data, defer_work); -+ smp_store_release(&sd->defer_ipi_scheduled, 0); -+ local_bh_disable(); -+ skb_defer_free_flush(sd); -+ local_bh_enable(); -+} ++ while (!OSSL_STORE_eof(store)) { ++ OSSL_STORE_INFO *info = OSSL_STORE_load(store); + ++ if (!info) { ++ drain_openssl_errors(__LINE__, 0); ++ continue; ++ } ++ if (OSSL_STORE_INFO_get_type(info) == OSSL_STORE_INFO_CERT) { ++ cert = OSSL_STORE_INFO_get1_CERT(info); ++ ERR(!cert, "OSSL_STORE_INFO_get1_CERT"); ++ } ++ OSSL_STORE_INFO_free(info); ++ if (cert) ++ break; ++ } ++ OSSL_STORE_close(store); ++#elif defined(USE_PKCS11_ENGINE) ++ ENGINE *e; ++ struct { ++ const char *cert_id; ++ X509 *cert; ++ } parms; ++ ++ parms.cert_id = cert_src; ++ parms.cert = NULL; ++ ++ ENGINE_load_builtin_engines(); ++ drain_openssl_errors(__LINE__, 1); ++ e = ENGINE_by_id("pkcs11"); ++ ERR(!e, "Load PKCS#11 ENGINE"); ++ if (ENGINE_init(e)) ++ drain_openssl_errors(__LINE__, 1); ++ else ++ ERR(1, "ENGINE_init"); ++ if (key_pass) ++ ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN"); ++ ENGINE_ctrl_cmd(e, "LOAD_CERT_CTRL", 0, &parms, NULL, 1); ++ ERR(!parms.cert, "Get X.509 from PKCS#11"); ++ cert = parms.cert; ++#else ++ fprintf(stderr, "no pkcs11 engine/provider available\n"); ++ exit(1); +#endif ++ return cert; ++} + - static int napi_threaded_poll(void *data) + int main(int argc, char **argv) { - struct napi_struct *napi = data; -@@ -11664,7 +11681,11 @@ static int __init net_dev_init(void) - INIT_CSD(&sd->csd, rps_trigger_softirq, sd); - sd->cpu = i; - #endif -+#ifndef CONFIG_PREEMPT_RT - INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); + char *cert_src; +@@ -122,28 +150,10 @@ int main(int argc, char **argv) + fclose(f); + exit(0); + } else if (!strncmp(cert_src, "pkcs11:", 7)) { +- ENGINE *e; +- struct { +- const char *cert_id; +- X509 *cert; +- } parms; +- +- parms.cert_id = cert_src; +- parms.cert = NULL; ++ X509 *cert = load_cert_pkcs11(cert_src); + +- ENGINE_load_builtin_engines(); +- drain_openssl_errors(); +- e = ENGINE_by_id("pkcs11"); +- ERR(!e, "Load PKCS#11 ENGINE"); +- if (ENGINE_init(e)) +- drain_openssl_errors(); +- else +- ERR(1, "ENGINE_init"); +- if (key_pass) +- ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN"); +- ENGINE_ctrl_cmd(e, "LOAD_CERT_CTRL", 0, &parms, NULL, 1); +- ERR(!parms.cert, "Get X.509 from PKCS#11"); +- write_cert(parms.cert); ++ ERR(!cert, "load_cert_pkcs11 failed"); ++ write_cert(cert); + } else { + BIO *b; + X509 *x509; +diff --git a/scripts/sign-file.c b/scripts/sign-file.c +index 3edb156ae52c..7070245edfc1 100644 +--- a/scripts/sign-file.c ++++ b/scripts/sign-file.c +@@ -27,14 +27,17 @@ + #include + #include + #include +-#include +- +-/* +- * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. +- * +- * Remove this if/when that API is no longer used +- */ +-#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++#if OPENSSL_VERSION_MAJOR >= 3 ++# define USE_PKCS11_PROVIDER ++# include ++# include +#else -+ INIT_WORK(&sd->defer_work, trigger_rx_softirq); ++# if !defined(OPENSSL_NO_ENGINE) && !defined(OPENSSL_NO_DEPRECATED_3_0) ++# define USE_PKCS11_ENGINE ++# include ++# endif +#endif - spin_lock_init(&sd->defer_lock); ++#include "ssl-common.h" - init_gro_hash(&sd->backlog); -diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index 21a83e26f..4c42f9b7e 100644 ---- a/net/core/skbuff.c -+++ b/net/core/skbuff.c -@@ -6863,8 +6863,13 @@ nodefer: __kfree_skb(skb); - /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU - * if we are unlucky enough (this seems very unlikely). - */ -- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) -+ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { -+#ifndef CONFIG_PREEMPT_RT - smp_call_function_single_async(cpu, &sd->defer_csd); + /* + * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to +@@ -83,41 +86,6 @@ void format(void) + exit(2); + } + +-static void display_openssl_errors(int l) +-{ +- const char *file; +- char buf[120]; +- int e, line; +- +- if (ERR_peek_error() == 0) +- return; +- fprintf(stderr, "At main.c:%d:\n", l); +- +- while ((e = ERR_get_error_line(&file, &line))) { +- ERR_error_string(e, buf); +- fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); +- } +-} +- +-static void drain_openssl_errors(void) +-{ +- const char *file; +- int line; +- +- if (ERR_peek_error() == 0) +- return; +- while (ERR_get_error_line(&file, &line)) {} +-} +- +-#define ERR(cond, fmt, ...) \ +- do { \ +- bool __cond = (cond); \ +- display_openssl_errors(__LINE__); \ +- if (__cond) { \ +- errx(1, fmt, ## __VA_ARGS__); \ +- } \ +- } while(0) +- + static const char *key_pass; + + static int pem_pw_cb(char *buf, int len, int w, void *v) +@@ -139,28 +107,64 @@ static int pem_pw_cb(char *buf, int len, int w, void *v) + return pwlen; + } + +-static EVP_PKEY *read_private_key(const char *private_key_name) ++static EVP_PKEY *read_private_key_pkcs11(const char *private_key_name) + { +- EVP_PKEY *private_key; ++ EVP_PKEY *private_key = NULL; ++#ifdef USE_PKCS11_PROVIDER ++ OSSL_STORE_CTX *store; + ++ if (!OSSL_PROVIDER_try_load(NULL, "pkcs11", true)) ++ ERR(1, "OSSL_PROVIDER_try_load(pkcs11)"); ++ if (!OSSL_PROVIDER_try_load(NULL, "default", true)) ++ ERR(1, "OSSL_PROVIDER_try_load(default)"); ++ ++ store = OSSL_STORE_open(private_key_name, NULL, NULL, NULL, NULL); ++ ERR(!store, "OSSL_STORE_open"); ++ ++ while (!OSSL_STORE_eof(store)) { ++ OSSL_STORE_INFO *info = OSSL_STORE_load(store); ++ ++ if (!info) { ++ drain_openssl_errors(__LINE__, 0); ++ continue; ++ } ++ if (OSSL_STORE_INFO_get_type(info) == OSSL_STORE_INFO_PKEY) { ++ private_key = OSSL_STORE_INFO_get1_PKEY(info); ++ ERR(!private_key, "OSSL_STORE_INFO_get1_PKEY"); ++ } ++ OSSL_STORE_INFO_free(info); ++ if (private_key) ++ break; ++ } ++ OSSL_STORE_close(store); ++#elif defined(USE_PKCS11_ENGINE) ++ ENGINE *e; ++ ++ ENGINE_load_builtin_engines(); ++ drain_openssl_errors(__LINE__, 1); ++ e = ENGINE_by_id("pkcs11"); ++ ERR(!e, "Load PKCS#11 ENGINE"); ++ if (ENGINE_init(e)) ++ drain_openssl_errors(__LINE__, 1); ++ else ++ ERR(1, "ENGINE_init"); ++ if (key_pass) ++ ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), "Set PKCS#11 PIN"); ++ private_key = ENGINE_load_private_key(e, private_key_name, NULL, NULL); ++ ERR(!private_key, "%s", private_key_name); +#else -+ schedule_work_on(cpu, &sd->defer_work); ++ fprintf(stderr, "no pkcs11 engine/provider available\n"); ++ exit(1); +#endif ++ return private_key; ++} ++ ++static EVP_PKEY *read_private_key(const char *private_key_name) ++{ + if (!strncmp(private_key_name, "pkcs11:", 7)) { +- ENGINE *e; +- +- ENGINE_load_builtin_engines(); +- drain_openssl_errors(); +- e = ENGINE_by_id("pkcs11"); +- ERR(!e, "Load PKCS#11 ENGINE"); +- if (ENGINE_init(e)) +- drain_openssl_errors(); +- else +- ERR(1, "ENGINE_init"); +- if (key_pass) +- ERR(!ENGINE_ctrl_cmd_string(e, "PIN", key_pass, 0), +- "Set PKCS#11 PIN"); +- private_key = ENGINE_load_private_key(e, private_key_name, +- NULL, NULL); +- ERR(!private_key, "%s", private_key_name); ++ return read_private_key_pkcs11(private_key_name); + } else { ++ EVP_PKEY *private_key; + BIO *b; + + b = BIO_new_file(private_key_name, "rb"); +@@ -169,9 +173,9 @@ static EVP_PKEY *read_private_key(const char *private_key_name) + NULL); + ERR(!private_key, "%s", private_key_name); + BIO_free(b); +- } + +- return private_key; ++ return private_key; + } } - static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, + static X509 *read_x509(const char *x509_name) +@@ -306,7 +310,7 @@ int main(int argc, char **argv) + + /* Digest the module data. */ + OpenSSL_add_all_digests(); +- display_openssl_errors(__LINE__); ++ drain_openssl_errors(__LINE__, 0); + digest_algo = EVP_get_digestbyname(hash_algo); + ERR(!digest_algo, "EVP_get_digestbyname"); + +diff --git a/scripts/ssl-common.h b/scripts/ssl-common.h +new file mode 100644 +index 000000000000..2db0e181143c +--- /dev/null ++++ b/scripts/ssl-common.h +@@ -0,0 +1,32 @@ ++/* SPDX-License-Identifier: LGPL-2.1+ */ ++/* ++ * SSL helper functions shared by sign-file and extract-cert. ++ */ ++ ++static void drain_openssl_errors(int l, int silent) ++{ ++ const char *file; ++ char buf[120]; ++ int e, line; ++ ++ if (ERR_peek_error() == 0) ++ return; ++ if (!silent) ++ fprintf(stderr, "At main.c:%d:\n", l); ++ ++ while ((e = ERR_peek_error_line(&file, &line))) { ++ ERR_error_string(e, buf); ++ if (!silent) ++ fprintf(stderr, "- SSL %s: %s:%d\n", buf, file, line); ++ ERR_get_error(); ++ } ++} ++ ++#define ERR(cond, fmt, ...) \ ++ do { \ ++ bool __cond = (cond); \ ++ drain_openssl_errors(__LINE__, 0); \ ++ if (__cond) { \ ++ errx(1, fmt, ## __VA_ARGS__); \ ++ } \ ++ } while (0) +-- +2.51.0 + +From 51b41f9b0c701abdcd1a59d451151d1c5e15807d Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Fri, 20 Jun 2025 10:24:28 -0500 +Subject: [PATCH 211/213] Linux 6.6.94-rt56 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 3165a8781ff5..fdb0f880c7e9 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt54 ++-rt56 +-- +2.51.0 + +From 628674de31171594edc5a5f5c3c1a61c49c3491a Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Fri, 4 Jul 2025 12:42:07 -0500 +Subject: [PATCH 212/213] arch/riscv: fix conflicting prototypes for + check_unaligned_access + +Fix mismatch between prototype in cpufeature.h and actual definition +of the function in cpufeature.c + +Reported-by: KernelCI bot +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Clark Williams +--- + arch/riscv/include/asm/cpufeature.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h +index 13b7d35648a9..788fd575c21a 100644 +--- a/arch/riscv/include/asm/cpufeature.h ++++ b/arch/riscv/include/asm/cpufeature.h +@@ -30,7 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed); + /* Per-cpu ISA extensions. */ + extern struct riscv_isainfo hart_isa[NR_CPUS]; + +-void check_unaligned_access(int cpu); + void riscv_user_isa_enable(void); + + #endif +-- +2.51.0 + +From 464e4b647f7456b063b57c16570cbe94a22626a4 Mon Sep 17 00:00:00 2001 +From: Clark Williams +Date: Tue, 9 Sep 2025 21:56:39 -0500 +Subject: [PATCH 213/213] Linux 6.6.104-rt60 REBASE + +Signed-off-by: Clark Williams +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index fdb0f880c7e9..66fa05e70f29 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt56 ++-rt60 +-- +2.51.0 + diff --git a/cgmanifest.json b/cgmanifest.json index ae9c0a1d5d3..f3013aed0a3 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -8351,8 +8351,8 @@ "type": "other", "other": { "name": "kernel-rt", - "version": "6.6.85.1", - "downloadUrl": "https://github.com/microsoft/CBL-Mariner-Linux-Kernel/archive/rolling-lts/mariner-3/6.6.85.1.tar.gz" + "version": "6.6.104.2", + "downloadUrl": "https://github.com/microsoft/CBL-Mariner-Linux-Kernel/archive/rolling-lts/mariner-3/6.6.104.2.tar.gz" } } },